diff mbox

[v4] xen/arm: flush icache as well when XEN_DOMCTL_cacheflush is issued

Message ID 20170127182523.17054-1-tamas.lengyel@zentific.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tamas Lengyel Jan. 27, 2017, 6:25 p.m. UTC
When the toolstack modifies memory of a running ARM VM it may happen
that the underlying memory of a current vCPU PC is changed. Without
flushing the icache the vCPU may continue executing stale instructions.

Also expose the xc_domain_cacheflush through xenctrl.h.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
---
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Julien Grall <julien.grall@arm.com>

Note: patch has been verified to solve stale icache issues on the
      HiKey platform.

v4: Fix commit message
v3: Flush the entire icache instead of flush by VA
v2: Return 0 on x86 and clarify comment in xenctrl.h
---
 tools/libxc/include/xenctrl.h |  8 ++++++++
 tools/libxc/xc_domain.c       |  6 +++---
 tools/libxc/xc_private.h      |  3 ---
 xen/arch/arm/mm.c             | 10 ++++++++++
 4 files changed, 21 insertions(+), 6 deletions(-)

Comments

Stefano Stabellini Jan. 27, 2017, 8:41 p.m. UTC | #1
On Fri, 27 Jan 2017, Tamas K Lengyel wrote:
> When the toolstack modifies memory of a running ARM VM it may happen
> that the underlying memory of a current vCPU PC is changed. Without
> flushing the icache the vCPU may continue executing stale instructions.
> 
> Also expose the xc_domain_cacheflush through xenctrl.h.
> 
> Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com>
> Acked-by: Wei Liu <wei.liu2@citrix.com>
> ---
> Cc: Ian Jackson <ian.jackson@eu.citrix.com>
> Cc: Stefano Stabellini <sstabellini@kernel.org>
> Cc: Julien Grall <julien.grall@arm.com>
> 
> Note: patch has been verified to solve stale icache issues on the
>       HiKey platform.

Sorry to come in the discussion late, but there has been a lot of
traffic on this in the last two days. The patch is clear and well
written, thanks for that. However, I am concerned about the performance
impact of the icache flush.

What stale icache issues is it trying to solve?

This patch introduces the icache flush in flush_page_to_ram, which is
called in two instances:

1) arch_do_domctl(XEN_DOMCTL_cacheflush) -> p2m_cache_flush -> flush_page_to_ram

2) alloc_xenheap_pages

It looks like we don't need the icache flush in 2). We should probably
avoid icache flushes for that. Julien, do you agree?

I am also wondering about all the libxc/libxl callers, many of whom
don't need an icache flush. Ideally we would have an argument in
XEN_DOMCTL_cacheflush to specify the type of cache flush we need,
similar to GNTTABOP_cache_flush.


> v4: Fix commit message
> v3: Flush the entire icache instead of flush by VA
> v2: Return 0 on x86 and clarify comment in xenctrl.h
> ---
>  tools/libxc/include/xenctrl.h |  8 ++++++++
>  tools/libxc/xc_domain.c       |  6 +++---
>  tools/libxc/xc_private.h      |  3 ---
>  xen/arch/arm/mm.c             | 10 ++++++++++
>  4 files changed, 21 insertions(+), 6 deletions(-)
> 
> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
> index 63c616ff6a..a2f23fcd5a 100644
> --- a/tools/libxc/include/xenctrl.h
> +++ b/tools/libxc/include/xenctrl.h
> @@ -2720,6 +2720,14 @@ int xc_livepatch_revert(xc_interface *xch, char *name, uint32_t timeout);
>  int xc_livepatch_unload(xc_interface *xch, char *name, uint32_t timeout);
>  int xc_livepatch_replace(xc_interface *xch, char *name, uint32_t timeout);
>  
> +/*
> + * Ensure cache coherency after memory modifications. A call to this function
> + * is only required on ARM as the x86 architecture provides cache coherency
> + * guarantees. Calling this function on x86 is allowed but has no effect.
> + */
> +int xc_domain_cacheflush(xc_interface *xch, uint32_t domid,
> +                         xen_pfn_t start_pfn, xen_pfn_t nr_pfns);
> +
>  /* Compat shims */
>  #include "xenctrl_compat.h"
>  
> diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
> index 296b8523b5..98ab6ba3fd 100644
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -74,10 +74,10 @@ int xc_domain_cacheflush(xc_interface *xch, uint32_t domid,
>      /*
>       * The x86 architecture provides cache coherency guarantees which prevent
>       * the need for this hypercall.  Avoid the overhead of making a hypercall
> -     * just for Xen to return -ENOSYS.
> +     * just for Xen to return -ENOSYS.  It is safe to ignore this call on x86
> +     * so we just return 0.
>       */
> -    errno = ENOSYS;
> -    return -1;
> +    return 0;
>  #else
>      DECLARE_DOMCTL;
>      domctl.cmd = XEN_DOMCTL_cacheflush;
> diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
> index 97445ae1fe..fddebdc917 100644
> --- a/tools/libxc/xc_private.h
> +++ b/tools/libxc/xc_private.h
> @@ -366,9 +366,6 @@ void bitmap_byte_to_64(uint64_t *lp, const uint8_t *bp, int nbits);
>  /* Optionally flush file to disk and discard page cache */
>  void discard_file_cache(xc_interface *xch, int fd, int flush);
>  
> -int xc_domain_cacheflush(xc_interface *xch, uint32_t domid,
> -			 xen_pfn_t start_pfn, xen_pfn_t nr_pfns);
> -
>  #define MAX_MMU_UPDATES 1024
>  struct xc_mmu {
>      mmu_update_t updates[MAX_MMU_UPDATES];
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index 99588a330d..596283fc99 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -390,6 +390,16 @@ void flush_page_to_ram(unsigned long mfn)
>  
>      clean_and_invalidate_dcache_va_range(v, PAGE_SIZE);
>      unmap_domain_page(v);
> +
> +    /*
> +     * For some of the instruction cache (such as VIPT), the entire I-Cache
> +     * needs to be flushed to guarantee that all the aliases of a given
> +     * physical address will be removed from the cache.
> +     * Invalidating the I-Cache by VA highly depends on the behavior of the
> +     * I-Cache (See D4.9.2 in ARM DDI 0487A.k_iss10775). Instead of using flush
> +     * by VA on select platforms, we just flush the entire cache here.
> +     */
> +    invalidate_icache();
>  }
>  
>  void __init arch_init_memory(void)
> -- 
> 2.11.0
>
Tamas Lengyel Jan. 27, 2017, 8:54 p.m. UTC | #2
On Fri, Jan 27, 2017 at 1:41 PM, Stefano Stabellini
<sstabellini@kernel.org> wrote:
> On Fri, 27 Jan 2017, Tamas K Lengyel wrote:
>> When the toolstack modifies memory of a running ARM VM it may happen
>> that the underlying memory of a current vCPU PC is changed. Without
>> flushing the icache the vCPU may continue executing stale instructions.
>>
>> Also expose the xc_domain_cacheflush through xenctrl.h.
>>
>> Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com>
>> Acked-by: Wei Liu <wei.liu2@citrix.com>
>> ---
>> Cc: Ian Jackson <ian.jackson@eu.citrix.com>
>> Cc: Stefano Stabellini <sstabellini@kernel.org>
>> Cc: Julien Grall <julien.grall@arm.com>
>>
>> Note: patch has been verified to solve stale icache issues on the
>>       HiKey platform.
>
> Sorry to come in the discussion late, but there has been a lot of
> traffic on this in the last two days. The patch is clear and well
> written, thanks for that. However, I am concerned about the performance
> impact of the icache flush.
>
> What stale icache issues is it trying to solve?

The case where it is very easy to trigger a stale icache is during an
SMC trap on multi-core boards (like the HiKey). If the monitor
application removes the SMC from memory in the callback (through
xc_map_foreign_range), the SMC instruction will still happen
repeatedly afterwards until the CPU gives up and fetches the actual
contents of the memory. The same icache issue could get triggered any
time the user is modifying instructions in the memory of an active VM.
But flushing the dcache would also be required any time memory is
modified anywhere, so exposing the libxc function would be necessary
without the icache issue anyway.

Speaking of which, after reading
https://events.linuxfoundation.org/sites/events/files/slides/slides_17.pdf,
shouldn't the cache flush happen both before and after memory is
modified (as shown on slide 33)?

>
> This patch introduces the icache flush in flush_page_to_ram, which is
> called in two instances:
>
> 1) arch_do_domctl(XEN_DOMCTL_cacheflush) -> p2m_cache_flush -> flush_page_to_ram
>
> 2) alloc_xenheap_pages
>
> It looks like we don't need the icache flush in 2). We should probably
> avoid icache flushes for that. Julien, do you agree?
>
> I am also wondering about all the libxc/libxl callers, many of whom
> don't need an icache flush. Ideally we would have an argument in
> XEN_DOMCTL_cacheflush to specify the type of cache flush we need,
> similar to GNTTABOP_cache_flush.
>

I'm OK with that, though that would probably require a bump in
XEN_DOMCTL_INTERFACE_VERSION.

Tamas
Julien Grall Jan. 27, 2017, 9:01 p.m. UTC | #3
Hi Stefano,

On 27/01/2017 20:41, Stefano Stabellini wrote:
> On Fri, 27 Jan 2017, Tamas K Lengyel wrote:
>> When the toolstack modifies memory of a running ARM VM it may happen
>> that the underlying memory of a current vCPU PC is changed. Without
>> flushing the icache the vCPU may continue executing stale instructions.
>>
>> Also expose the xc_domain_cacheflush through xenctrl.h.
>>
>> Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com>
>> Acked-by: Wei Liu <wei.liu2@citrix.com>
>> ---
>> Cc: Ian Jackson <ian.jackson@eu.citrix.com>
>> Cc: Stefano Stabellini <sstabellini@kernel.org>
>> Cc: Julien Grall <julien.grall@arm.com>
>>
>> Note: patch has been verified to solve stale icache issues on the
>>       HiKey platform.
>
> Sorry to come in the discussion late, but there has been a lot of
> traffic on this in the last two days. The patch is clear and well
> written, thanks for that. However, I am concerned about the performance
> impact of the icache flush.
>
> What stale icache issues is it trying to solve?

The icache issue for Tamas is when the monitor application is writing 
into the guest memory.

Even if we put aside this problem, the instruction cache is not able to 
snoop into the data cache. So if we don't invalidate the instruction 
cache, the guest may see stale instruction when booting or may 
requesting memory and then use it (e.g ballooning).

>
> This patch introduces the icache flush in flush_page_to_ram, which is
> called in two instances:
>
> 1) arch_do_domctl(XEN_DOMCTL_cacheflush) -> p2m_cache_flush -> flush_page_to_ram
>
> 2) alloc_xenheap_pages
>
> It looks like we don't need the icache flush in 2). We should probably
> avoid icache flushes for that. Julien, do you agree?

I disagree, in both case the full icache flush is necessary. As Tamas 
wrote in the comment, it is not possible to invalidate by VA because it 
does not ensure that all aliases will be removed in non-PIPT cache.

For the first instance, we could avoid the icache flush in each DOMCTL 
by flushing the instruction cache before the domain is running for the 
first time.

For the second instance, we have no other choice.

>
> I am also wondering about all the libxc/libxl callers, many of whom
> don't need an icache flush. Ideally we would have an argument in
> XEN_DOMCTL_cacheflush to specify the type of cache flush we need,
> similar to GNTTABOP_cache_flush.

Unless the instruction cache will be flushed before the guest is 
booting, all the callers of DOMCTL_cacheflush will require the invalidation.

Looking at GNTTABOP_cache_flush, I think we will also need to invalidate 
the instruction cache (or at least partially).

Regards,
Julien Grall Jan. 27, 2017, 9:23 p.m. UTC | #4
Hi,

On 27/01/2017 20:54, Tamas K Lengyel wrote:
> On Fri, Jan 27, 2017 at 1:41 PM, Stefano Stabellini
> <sstabellini@kernel.org> wrote:
>> On Fri, 27 Jan 2017, Tamas K Lengyel wrote:
>>> When the toolstack modifies memory of a running ARM VM it may happen
>>> that the underlying memory of a current vCPU PC is changed. Without
>>> flushing the icache the vCPU may continue executing stale instructions.
>>>
>>> Also expose the xc_domain_cacheflush through xenctrl.h.
>>>
>>> Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com>
>>> Acked-by: Wei Liu <wei.liu2@citrix.com>
>>> ---
>>> Cc: Ian Jackson <ian.jackson@eu.citrix.com>
>>> Cc: Stefano Stabellini <sstabellini@kernel.org>
>>> Cc: Julien Grall <julien.grall@arm.com>
>>>
>>> Note: patch has been verified to solve stale icache issues on the
>>>       HiKey platform.
>>
>> Sorry to come in the discussion late, but there has been a lot of
>> traffic on this in the last two days. The patch is clear and well
>> written, thanks for that. However, I am concerned about the performance
>> impact of the icache flush.
>>
>> What stale icache issues is it trying to solve?
>
> The case where it is very easy to trigger a stale icache is during an
> SMC trap on multi-core boards (like the HiKey). If the monitor
> application removes the SMC from memory in the callback (through
> xc_map_foreign_range), the SMC instruction will still happen
> repeatedly afterwards until the CPU gives up and fetches the actual
> contents of the memory. The same icache issue could get triggered any
> time the user is modifying instructions in the memory of an active VM.
> But flushing the dcache would also be required any time memory is
> modified anywhere, so exposing the libxc function would be necessary
> without the icache issue anyway.
>
> Speaking of which, after reading
> https://events.linuxfoundation.org/sites/events/files/slides/slides_17.pdf,
> shouldn't the cache flush happen both before and after memory is
> modified (as shown on slide 33)?

I have CCed Mark Rutland who wrote the slides to confirm. The example 
was about doing non-coherent DMA, the device rely on the OS to do the 
cache maintenance for the DMA operation.

In our case, the tools are mapping the region cacheable, so no need to 
do the cache flush before the modifying the memory.

Although, an issue I could see is if a guest vCPU is running with cache 
disabled (other than during early boot). In that case, we have the same 
memory region mapped with different cacheability attribute. But I think 
it will be a real mess and not really helpful as PV protocol would not work.

Cheers,
Stefano Stabellini Jan. 27, 2017, 11:53 p.m. UTC | #5
On Fri, 27 Jan 2017, Julien Grall wrote:
> Hi Stefano,
> 
> On 27/01/2017 20:41, Stefano Stabellini wrote:
> > On Fri, 27 Jan 2017, Tamas K Lengyel wrote:
> > > When the toolstack modifies memory of a running ARM VM it may happen
> > > that the underlying memory of a current vCPU PC is changed. Without
> > > flushing the icache the vCPU may continue executing stale instructions.
> > > 
> > > Also expose the xc_domain_cacheflush through xenctrl.h.
> > > 
> > > Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com>
> > > Acked-by: Wei Liu <wei.liu2@citrix.com>
> > > ---
> > > Cc: Ian Jackson <ian.jackson@eu.citrix.com>
> > > Cc: Stefano Stabellini <sstabellini@kernel.org>
> > > Cc: Julien Grall <julien.grall@arm.com>
> > > 
> > > Note: patch has been verified to solve stale icache issues on the
> > >       HiKey platform.
> > 
> > Sorry to come in the discussion late, but there has been a lot of
> > traffic on this in the last two days. The patch is clear and well
> > written, thanks for that. However, I am concerned about the performance
> > impact of the icache flush.
> > 
> > What stale icache issues is it trying to solve?
> 
> The icache issue for Tamas is when the monitor application is writing into the
> guest memory.
> 
> Even if we put aside this problem, the instruction cache is not able to snoop
> into the data cache. So if we don't invalidate the instruction cache, the
> guest may see stale instruction when booting or may requesting memory and then
> use it (e.g ballooning).

I understand. We probably need to add an icache flush on certain
ballooning operations.


> > This patch introduces the icache flush in flush_page_to_ram, which is
> > called in two instances:
> > 
> > 1) arch_do_domctl(XEN_DOMCTL_cacheflush) -> p2m_cache_flush ->
> > flush_page_to_ram
> > 
> > 2) alloc_xenheap_pages
> > 
> > It looks like we don't need the icache flush in 2). We should probably
> > avoid icache flushes for that. Julien, do you agree?
> 
> I disagree, in both case the full icache flush is necessary. As Tamas wrote in
> the comment, it is not possible to invalidate by VA because it does not ensure
> that all aliases will be removed in non-PIPT cache.
> 
> For the first instance, we could avoid the icache flush in each DOMCTL by
> flushing the instruction cache before the domain is running for the first
> time.

This seems like a good way forward.


> For the second instance, we have no other choice.

Most alloc_heap_pages (alloc_xenheap_pages and alloc_domheap_pages) are
done at domain initialization, so they would also be taken care by
flushing the instruction cache before the domain is running. There are
only very few exceptions to that, the main one being ballooning, and we
need another icache flush in that case. But I think we should avoid
doing global icache flushes every time alloc_heap_pages is called.


> > I am also wondering about all the libxc/libxl callers, many of whom
> > don't need an icache flush. Ideally we would have an argument in
> > XEN_DOMCTL_cacheflush to specify the type of cache flush we need,
> > similar to GNTTABOP_cache_flush.
> 
> Unless the instruction cache will be flushed before the guest is booting, all
> the callers of DOMCTL_cacheflush will require the invalidation.

DOMCTL_cacheflush is called several time during the domain build, it is
certainly better to do the icache flush once, rather than many times.

If we decide to perform one icache flush at domain creation in Xen at
the right time, we still need XEN_DOMCTL_cacheflush in its current form
to flush the dcache.

Also we still need a way to flush the icache to solve Tamas' problem
with mem_access at run time.

As a consequence, even after introducing one icache flush in Xen at
domain creation, I think we still need a new "icache flush" flag in
XEN_DOMCTL_cacheflush: all the current callers would not use it, but
mem_access userspace components will be able to use it.


> Looking at GNTTABOP_cache_flush, I think we will also need to invalidate the
> instruction cache (or at least partially).

GNTTABOP_cache_flush is used for DMA coherency with devices. Maybe there
are cases where an icache flush might be needed, but Linux doesn't use
it that way. In fact, if Linux needed a icache flush, it would need it
on native as well, but it does not.

For completeness, we could add a flag to the hypercall to request an
icache flush, but today it would be left unused.
Andrew Cooper Jan. 28, 2017, 5:04 p.m. UTC | #6
On 27/01/17 23:53, Stefano Stabellini wrote:
> On Fri, 27 Jan 2017, Julien Grall wrote:
>> On 27/01/2017 20:41, Stefano Stabellini wrote:
>> For the second instance, we have no other choice.
> Most alloc_heap_pages (alloc_xenheap_pages and alloc_domheap_pages) are
> done at domain initialization, so they would also be taken care by
> flushing the instruction cache before the domain is running. There are
> only very few exceptions to that, the main one being ballooning, and we
> need another icache flush in that case. But I think we should avoid
> doing global icache flushes every time alloc_heap_pages is called.

There is now d->creation_finished becomes true when the systemcontroler
pause count first drops to 0.  (i.e. the point at which the vcpus can
first start executing.)

This is used in the common memory code (populate_physmap() specifically)
to avoid TLB flushes.  I'd expect ARM can reuse it to avoid cache
flushes during domain construction as well.

~Andrew
Julien Grall Jan. 30, 2017, 4:01 p.m. UTC | #7
Hi Stefano,

On 27/01/17 23:53, Stefano Stabellini wrote:
> On Fri, 27 Jan 2017, Julien Grall wrote:
>> On 27/01/2017 20:41, Stefano Stabellini wrote:
>>> On Fri, 27 Jan 2017, Tamas K Lengyel wrote:
>> For the second instance, we have no other choice.
>
> Most alloc_heap_pages (alloc_xenheap_pages and alloc_domheap_pages) are
> done at domain initialization, so they would also be taken care by
> flushing the instruction cache before the domain is running. There are
> only very few exceptions to that, the main one being ballooning, and we
> need another icache flush in that case. But I think we should avoid
> doing global icache flushes every time alloc_heap_pages is called.

The invalidation of the full icache is unfortunate but necessary in 
non-PIPT cache. For PIPT cache you could do invalidation by VA.

Limiting the number of call is a nice optimization, but we need to be 
careful how to do it. Until then, a full icache invalidation (or by VA 
for PIPT) is the best solution.

>>> I am also wondering about all the libxc/libxl callers, many of whom
>>> don't need an icache flush. Ideally we would have an argument in
>>> XEN_DOMCTL_cacheflush to specify the type of cache flush we need,
>>> similar to GNTTABOP_cache_flush.
>>
>> Unless the instruction cache will be flushed before the guest is booting, all
>> the callers of DOMCTL_cacheflush will require the invalidation.
>
> DOMCTL_cacheflush is called several time during the domain build, it is
> certainly better to do the icache flush once, rather than many times.
>
> If we decide to perform one icache flush at domain creation in Xen at
> the right time, we still need XEN_DOMCTL_cacheflush in its current form
> to flush the dcache.
>
> Also we still need a way to flush the icache to solve Tamas' problem
> with mem_access at run time.
>
> As a consequence, even after introducing one icache flush in Xen at
> domain creation, I think we still need a new "icache flush" flag in
> XEN_DOMCTL_cacheflush: all the current callers would not use it, but
> mem_access userspace components will be able to use it.

Why do we need a flag? No matter the way the flag is defined (set -> 
icache invalidation vs set -> no icache invalidation), a user will 
likely misuse it. The hypervisor is the best person to decide whether 
the icache flush is needed. Aside at domain building time, 99.9% (to not 
say 100%) of the call to cacheflush will require the invalidation of the 
icache.

Furthermore, if you implement the optimization for invalidating PIP 
icache (e.g by VA rather than full), a user will not have the 
possibility to invalidate the full icache.

So I would go the same way as it has been done for tlbflush (see bbb17f6 
"move TLB-flush filtering out into populate_physmap during vm 
creation"). Let Xen decides when to optimize the invalidation.

>
>
>> Looking at GNTTABOP_cache_flush, I think we will also need to invalidate the
>> instruction cache (or at least partially).

Cheers,
Tamas Lengyel Jan. 30, 2017, 5:20 p.m. UTC | #8
On Mon, Jan 30, 2017 at 9:01 AM, Julien Grall <julien.grall@arm.com> wrote:
> Hi Stefano,
>
> On 27/01/17 23:53, Stefano Stabellini wrote:
>>
>> On Fri, 27 Jan 2017, Julien Grall wrote:
>>>
>>> On 27/01/2017 20:41, Stefano Stabellini wrote:
>>>>
>>>> On Fri, 27 Jan 2017, Tamas K Lengyel wrote:
>>>
>>> For the second instance, we have no other choice.
>>
>>
>> Most alloc_heap_pages (alloc_xenheap_pages and alloc_domheap_pages) are
>> done at domain initialization, so they would also be taken care by
>> flushing the instruction cache before the domain is running. There are
>> only very few exceptions to that, the main one being ballooning, and we
>> need another icache flush in that case. But I think we should avoid
>> doing global icache flushes every time alloc_heap_pages is called.
>
>
> The invalidation of the full icache is unfortunate but necessary in non-PIPT
> cache. For PIPT cache you could do invalidation by VA.
>
> Limiting the number of call is a nice optimization, but we need to be
> careful how to do it. Until then, a full icache invalidation (or by VA for
> PIPT) is the best solution.
>
>>>> I am also wondering about all the libxc/libxl callers, many of whom
>>>> don't need an icache flush. Ideally we would have an argument in
>>>> XEN_DOMCTL_cacheflush to specify the type of cache flush we need,
>>>> similar to GNTTABOP_cache_flush.
>>>
>>>
>>> Unless the instruction cache will be flushed before the guest is booting,
>>> all
>>> the callers of DOMCTL_cacheflush will require the invalidation.
>>
>>
>> DOMCTL_cacheflush is called several time during the domain build, it is
>> certainly better to do the icache flush once, rather than many times.
>>
>> If we decide to perform one icache flush at domain creation in Xen at
>> the right time, we still need XEN_DOMCTL_cacheflush in its current form
>> to flush the dcache.
>>
>> Also we still need a way to flush the icache to solve Tamas' problem
>> with mem_access at run time.
>>
>> As a consequence, even after introducing one icache flush in Xen at
>> domain creation, I think we still need a new "icache flush" flag in
>> XEN_DOMCTL_cacheflush: all the current callers would not use it, but
>> mem_access userspace components will be able to use it.
>
>
> Why do we need a flag? No matter the way the flag is defined (set -> icache
> invalidation vs set -> no icache invalidation), a user will likely misuse
> it. The hypervisor is the best person to decide whether the icache flush is
> needed. Aside at domain building time, 99.9% (to not say 100%) of the call
> to cacheflush will require the invalidation of the icache.
>
> Furthermore, if you implement the optimization for invalidating PIP icache
> (e.g by VA rather than full), a user will not have the possibility to
> invalidate the full icache.
>
> So I would go the same way as it has been done for tlbflush (see bbb17f6
> "move TLB-flush filtering out into populate_physmap during vm creation").
> Let Xen decides when to optimize the invalidation.
>

Giving the user the option during the domctl to choose which cache to
flush I think would be fine. I would agree that the most likely case
is that both caches should be flushed at the same time, but who knows,
having the option may be useful for someone. At least the linux
syscall has that option as well
(http://man7.org/linux/man-pages/man2/cacheflush.2.html).

In any rate, I already made the patch that implements it:
https://github.com/tklengyel/xen/compare/icache?expand=1. Let me know
if you would want me to send it or not.

Thanks,
Tamas
Stefano Stabellini Jan. 30, 2017, 9:16 p.m. UTC | #9
On Mon, 30 Jan 2017, Tamas K Lengyel wrote:
> On Mon, Jan 30, 2017 at 9:01 AM, Julien Grall <julien.grall@arm.com> wrote:
> > Hi Stefano,
> >
> > On 27/01/17 23:53, Stefano Stabellini wrote:
> >>
> >> On Fri, 27 Jan 2017, Julien Grall wrote:
> >>>
> >>> On 27/01/2017 20:41, Stefano Stabellini wrote:
> >>>>
> >>>> On Fri, 27 Jan 2017, Tamas K Lengyel wrote:
> >>>
> >>> For the second instance, we have no other choice.
> >>
> >>
> >> Most alloc_heap_pages (alloc_xenheap_pages and alloc_domheap_pages) are
> >> done at domain initialization, so they would also be taken care by
> >> flushing the instruction cache before the domain is running. There are
> >> only very few exceptions to that, the main one being ballooning, and we
> >> need another icache flush in that case. But I think we should avoid
> >> doing global icache flushes every time alloc_heap_pages is called.
> >
> >
> > The invalidation of the full icache is unfortunate but necessary in non-PIPT
> > cache. For PIPT cache you could do invalidation by VA.
> >
> > Limiting the number of call is a nice optimization, but we need to be
> > careful how to do it. Until then, a full icache invalidation (or by VA for
> > PIPT) is the best solution.
> >
> >>>> I am also wondering about all the libxc/libxl callers, many of whom
> >>>> don't need an icache flush. Ideally we would have an argument in
> >>>> XEN_DOMCTL_cacheflush to specify the type of cache flush we need,
> >>>> similar to GNTTABOP_cache_flush.
> >>>
> >>>
> >>> Unless the instruction cache will be flushed before the guest is booting,
> >>> all
> >>> the callers of DOMCTL_cacheflush will require the invalidation.
> >>
> >>
> >> DOMCTL_cacheflush is called several time during the domain build, it is
> >> certainly better to do the icache flush once, rather than many times.
> >>
> >> If we decide to perform one icache flush at domain creation in Xen at
> >> the right time, we still need XEN_DOMCTL_cacheflush in its current form
> >> to flush the dcache.
> >>
> >> Also we still need a way to flush the icache to solve Tamas' problem
> >> with mem_access at run time.
> >>
> >> As a consequence, even after introducing one icache flush in Xen at
> >> domain creation, I think we still need a new "icache flush" flag in
> >> XEN_DOMCTL_cacheflush: all the current callers would not use it, but
> >> mem_access userspace components will be able to use it.
> >
> >
> > Why do we need a flag? No matter the way the flag is defined (set -> icache
> > invalidation vs set -> no icache invalidation), a user will likely misuse
> > it. The hypervisor is the best person to decide whether the icache flush is
> > needed. Aside at domain building time, 99.9% (to not say 100%) of the call
> > to cacheflush will require the invalidation of the icache.
> >
> > Furthermore, if you implement the optimization for invalidating PIP icache
> > (e.g by VA rather than full), a user will not have the possibility to
> > invalidate the full icache.
> >
> > So I would go the same way as it has been done for tlbflush (see bbb17f6
> > "move TLB-flush filtering out into populate_physmap during vm creation").
> > Let Xen decides when to optimize the invalidation.
> >
> 
> Giving the user the option during the domctl to choose which cache to
> flush I think would be fine. I would agree that the most likely case
> is that both caches should be flushed at the same time, but who knows,
> having the option may be useful for someone. At least the linux
> syscall has that option as well
> (http://man7.org/linux/man-pages/man2/cacheflush.2.html).
> 
> In any rate, I already made the patch that implements it:
> https://github.com/tklengyel/xen/compare/icache?expand=1. Let me know
> if you would want me to send it or not.

Thanks Tamas. I'll take your v4 patch as is. Then, we'll figure out
what's the best way to avoid flushing the icache too often, especially
at VM creation time.
diff mbox

Patch

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 63c616ff6a..a2f23fcd5a 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2720,6 +2720,14 @@  int xc_livepatch_revert(xc_interface *xch, char *name, uint32_t timeout);
 int xc_livepatch_unload(xc_interface *xch, char *name, uint32_t timeout);
 int xc_livepatch_replace(xc_interface *xch, char *name, uint32_t timeout);
 
+/*
+ * Ensure cache coherency after memory modifications. A call to this function
+ * is only required on ARM as the x86 architecture provides cache coherency
+ * guarantees. Calling this function on x86 is allowed but has no effect.
+ */
+int xc_domain_cacheflush(xc_interface *xch, uint32_t domid,
+                         xen_pfn_t start_pfn, xen_pfn_t nr_pfns);
+
 /* Compat shims */
 #include "xenctrl_compat.h"
 
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 296b8523b5..98ab6ba3fd 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -74,10 +74,10 @@  int xc_domain_cacheflush(xc_interface *xch, uint32_t domid,
     /*
      * The x86 architecture provides cache coherency guarantees which prevent
      * the need for this hypercall.  Avoid the overhead of making a hypercall
-     * just for Xen to return -ENOSYS.
+     * just for Xen to return -ENOSYS.  It is safe to ignore this call on x86
+     * so we just return 0.
      */
-    errno = ENOSYS;
-    return -1;
+    return 0;
 #else
     DECLARE_DOMCTL;
     domctl.cmd = XEN_DOMCTL_cacheflush;
diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
index 97445ae1fe..fddebdc917 100644
--- a/tools/libxc/xc_private.h
+++ b/tools/libxc/xc_private.h
@@ -366,9 +366,6 @@  void bitmap_byte_to_64(uint64_t *lp, const uint8_t *bp, int nbits);
 /* Optionally flush file to disk and discard page cache */
 void discard_file_cache(xc_interface *xch, int fd, int flush);
 
-int xc_domain_cacheflush(xc_interface *xch, uint32_t domid,
-			 xen_pfn_t start_pfn, xen_pfn_t nr_pfns);
-
 #define MAX_MMU_UPDATES 1024
 struct xc_mmu {
     mmu_update_t updates[MAX_MMU_UPDATES];
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index 99588a330d..596283fc99 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -390,6 +390,16 @@  void flush_page_to_ram(unsigned long mfn)
 
     clean_and_invalidate_dcache_va_range(v, PAGE_SIZE);
     unmap_domain_page(v);
+
+    /*
+     * For some of the instruction cache (such as VIPT), the entire I-Cache
+     * needs to be flushed to guarantee that all the aliases of a given
+     * physical address will be removed from the cache.
+     * Invalidating the I-Cache by VA highly depends on the behavior of the
+     * I-Cache (See D4.9.2 in ARM DDI 0487A.k_iss10775). Instead of using flush
+     * by VA on select platforms, we just flush the entire cache here.
+     */
+    invalidate_icache();
 }
 
 void __init arch_init_memory(void)