diff mbox series

[V1,09/16] arm/ioreq: Introduce arch specific bits for IOREQ/DM features

Message ID 1599769330-17656-10-git-send-email-olekstysh@gmail.com (mailing list archive)
State Superseded
Headers show
Series IOREQ feature (+ virtio-mmio) on Arm | expand

Commit Message

Oleksandr Tyshchenko Sept. 10, 2020, 8:22 p.m. UTC
From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>

This patch adds basic IOREQ/DM support on Arm. The subsequent
patches will improve functionality, add remaining bits as well as
address several TODOs.

Please note, the "PIO handling" TODO is expected to left unaddressed
for the current series. It is not an big issue for now while Xen
doesn't have support for vPCI on Arm. On Arm64 they are only used
for PCI IO Bar and we would probably want to expose them to emulator
as PIO access to make a DM completely arch-agnostic. So "PIO handling"
should be implemented when we add support for vPCI.

Please note, at the moment build on Arm32 is broken (see cmpxchg
usage in hvm_send_buffered_ioreq()) due to the lack of cmpxchg_64
support on Arm32. There is a patch on review to address this issue:
https://patchwork.kernel.org/patch/11715559/

Signed-off-by: Julien Grall <julien.grall@arm.com>
Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>

---
Please note, this is a split/cleanup/hardening of Julien's PoC:
"Add support for Guest IO forwarding to a device emulator"

Changes RFC -> V1:
   - was split into:
     - arm/ioreq: Introduce arch specific bits for IOREQ/DM features
     - xen/mm: Handle properly reference in set_foreign_p2m_entry() on Arm
   - update patch description
   - update asm-arm/hvm/ioreq.h according to the newly introduced arch functions:
     - arch_hvm_destroy_ioreq_server()
     - arch_handle_hvm_io_completion()
   - update arch files to include xen/ioreq.h
   - remove HVMOP plumbing
   - rewrite a logic to handle properly case when hvm_send_ioreq() returns IO_RETRY
   - add a logic to handle properly handle_hvm_io_completion() return value
   - rename handle_mmio() to ioreq_handle_complete_mmio()
   - move paging_mark_pfn_dirty() to asm-arm/paging.h
   - remove forward declaration for hvm_ioreq_server in asm-arm/paging.h
   - move try_fwd_ioserv() to ioreq.c, provide stubs if !CONFIG_IOREQ_SERVER
   - do not remove #ifdef CONFIG_IOREQ_SERVER in memory.c for guarding xen/ioreq.h
   - use gdprintk in try_fwd_ioserv(), remove unneeded prints
   - update list of #include-s
   - move has_vpci() to asm-arm/domain.h
   - add a comment (TODO) to unimplemented yet handle_pio()
   - remove hvm_mmio_first(last)_byte() and hvm_ioreq_(page/vcpu/server) structs
     from the arch files, they were already moved to the common code
   - remove set_foreign_p2m_entry() changes, they will be properly implemented
     in the follow-up patch
   - select IOREQ_SERVER for Arm instead of Arm64 in Kconfig
   - remove x86's realmode and other unneeded stubs from xen/ioreq.h
   - clafify ioreq_t p.df usage in try_fwd_ioserv()
   - set ioreq_t p.count to 1 in try_fwd_ioserv()
---
---
 xen/arch/arm/Kconfig            |   1 +
 xen/arch/arm/Makefile           |   2 +
 xen/arch/arm/dm.c               |  33 ++++++++++
 xen/arch/arm/domain.c           |   9 +++
 xen/arch/arm/io.c               |  11 +++-
 xen/arch/arm/ioreq.c            | 142 ++++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/traps.c            |  32 +++++++--
 xen/include/asm-arm/domain.h    |  46 +++++++++++++
 xen/include/asm-arm/hvm/ioreq.h | 108 ++++++++++++++++++++++++++++++
 xen/include/asm-arm/mmio.h      |   1 +
 xen/include/asm-arm/paging.h    |   4 ++
 11 files changed, 384 insertions(+), 5 deletions(-)
 create mode 100644 xen/arch/arm/dm.c
 create mode 100644 xen/arch/arm/ioreq.c
 create mode 100644 xen/include/asm-arm/hvm/ioreq.h

Comments

Oleksandr Tyshchenko Sept. 11, 2020, 10:14 a.m. UTC | #1
Hello all.


>   /*
> diff --git a/xen/include/asm-arm/hvm/ioreq.h b/xen/include/asm-arm/hvm/ioreq.h
> new file mode 100644
> index 0000000..1c34df0
> --- /dev/null
> +++ b/xen/include/asm-arm/hvm/ioreq.h
> @@ -0,0 +1,108 @@
> +/*
> + * hvm.h: Hardware virtual machine assist interface definitions.
> + *
> + * Copyright (c) 2016 Citrix Systems Inc.
> + * Copyright (c) 2019 Arm ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef __ASM_ARM_HVM_IOREQ_H__
> +#define __ASM_ARM_HVM_IOREQ_H__
> +
> +#include <public/hvm/ioreq.h>
> +#include <public/hvm/dm_op.h>
> +
> +#ifdef CONFIG_IOREQ_SERVER
> +enum io_state handle_ioserv(struct cpu_user_regs *regs, struct vcpu *v);
> +enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
> +                             struct vcpu *v, mmio_info_t *info);
> +#else
> +static inline enum io_state handle_ioserv(struct cpu_user_regs *regs,
> +                                          struct vcpu *v)
> +{
> +    return IO_UNHANDLED;
> +}
> +
> +static inline enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
> +                                           struct vcpu *v, mmio_info_t *info)
> +{
> +    return IO_UNHANDLED;
> +}
> +#endif
> +
> +bool ioreq_handle_complete_mmio(void);
> +
> +static inline bool handle_pio(uint16_t port, unsigned int size, int dir)
> +{
> +    /*
> +     * TODO: For Arm64, the main user will be PCI. So this should be
> +     * implemented when we add support for vPCI.
> +     */
> +    BUG();
> +    return true;
> +}
> +
> +static inline int arch_hvm_destroy_ioreq_server(struct hvm_ioreq_server *s)
> +{
> +    return 0;
> +}
> +
> +static inline void msix_write_completion(struct vcpu *v)
> +{
> +}
> +
> +static inline bool arch_handle_hvm_io_completion(
> +    enum hvm_io_completion io_completion)
> +{
> +    ASSERT_UNREACHABLE();

I am sorry, but there should be return true;
to avoid "no return statement in function returning non-void 
[-Werror=return-type]"
I am a little bit puzzled why I didn't spot this build error earlier.


> +}
Jan Beulich Sept. 16, 2020, 7:51 a.m. UTC | #2
On 10.09.2020 22:22, Oleksandr Tyshchenko wrote:
> @@ -2277,8 +2299,10 @@ void leave_hypervisor_to_guest(void)
>  {
>      local_irq_disable();
>  
> -    check_for_vcpu_work();
> -    check_for_pcpu_work();
> +    do
> +    {
> +        check_for_pcpu_work();
> +    } while ( check_for_vcpu_work() );

Looking at patch 11 I've stumbled across changes done to code
related to this, and now I wonder: There's no mention in the
description of why this safe (i.e. not a potentially unbounded
loop).

As a nit - the opening brace does not belong on its own line in
this specific case.

Jan
Oleksandr Tyshchenko Sept. 22, 2020, 5:12 p.m. UTC | #3
On 16.09.20 10:51, Jan Beulich wrote:

Hi Jan

> On 10.09.2020 22:22, Oleksandr Tyshchenko wrote:
>> @@ -2277,8 +2299,10 @@ void leave_hypervisor_to_guest(void)
>>   {
>>       local_irq_disable();
>>   
>> -    check_for_vcpu_work();
>> -    check_for_pcpu_work();
>> +    do
>> +    {
>> +        check_for_pcpu_work();
>> +    } while ( check_for_vcpu_work() );
> Looking at patch 11 I've stumbled across changes done to code
> related to this, and now I wonder: There's no mention in the
> description of why this safe (i.e. not a potentially unbounded
> loop).
Indeed there was a discussion regarding that. Probably I should have 
added an explanation.
Please see the thoughts about that and a reason why it was decided to 
let the vCPU to spin forever if I/O never completes
(check_for_vcpu_work() never returns false) and why it was considered as 
safe action:
https://patchwork.kernel.org/patch/11698549/#23540209


>
> As a nit - the opening brace does not belong on its own line in
> this specific case.
ok
Julien Grall Sept. 23, 2020, 6:03 p.m. UTC | #4
Hi,

On 10/09/2020 21:22, Oleksandr Tyshchenko wrote:
> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>

I believe I am the originally author of this code...
> This patch adds basic IOREQ/DM support on Arm. The subsequent
> patches will improve functionality, add remaining bits as well as
> address several TODOs.

Find a bit weird to add code with TODOs that are handled in the same 
series? Can't we just split this patch in smaller one where everything 
is addressed from start?

> 
> Please note, the "PIO handling" TODO is expected to left unaddressed
> for the current series. It is not an big issue for now while Xen
> doesn't have support for vPCI on Arm. On Arm64 they are only used
> for PCI IO Bar and we would probably want to expose them to emulator
> as PIO access to make a DM completely arch-agnostic. So "PIO handling"
> should be implemented when we add support for vPCI.
> 
> Please note, at the moment build on Arm32 is broken (see cmpxchg
> usage in hvm_send_buffered_ioreq()) due to the lack of cmpxchg_64
> support on Arm32. There is a patch on review to address this issue:
> https://patchwork.kernel.org/patch/11715559/

This has been committed.

> 
> Signed-off-by: Julien Grall <julien.grall@arm.com>
> Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>
> ---
> ---
>   xen/arch/arm/Kconfig            |   1 +
>   xen/arch/arm/Makefile           |   2 +
>   xen/arch/arm/dm.c               |  33 ++++++++++
>   xen/arch/arm/domain.c           |   9 +++
>   xen/arch/arm/io.c               |  11 +++-
>   xen/arch/arm/ioreq.c            | 142 ++++++++++++++++++++++++++++++++++++++++
>   xen/arch/arm/traps.c            |  32 +++++++--
>   xen/include/asm-arm/domain.h    |  46 +++++++++++++
>   xen/include/asm-arm/hvm/ioreq.h | 108 ++++++++++++++++++++++++++++++
>   xen/include/asm-arm/mmio.h      |   1 +
>   xen/include/asm-arm/paging.h    |   4 ++
>   11 files changed, 384 insertions(+), 5 deletions(-)
>   create mode 100644 xen/arch/arm/dm.c
>   create mode 100644 xen/arch/arm/ioreq.c
>   create mode 100644 xen/include/asm-arm/hvm/ioreq.h
> 
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index 2777388..8264cd6 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -21,6 +21,7 @@ config ARM
>   	select HAS_PASSTHROUGH
>   	select HAS_PDX
>   	select IOMMU_FORCE_PT_SHARE
> +	select IOREQ_SERVER

I would prefer if IOREQ_SERVER is not included in the default build of 
Xen. This is fairly big feature that require a lot more testing.

>   
>   config ARCH_DEFCONFIG
>   	string
> diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
> index 7e82b21..617fa3e 100644
> --- a/xen/arch/arm/Makefile
> +++ b/xen/arch/arm/Makefile
> @@ -13,6 +13,7 @@ obj-y += cpuerrata.o
>   obj-y += cpufeature.o
>   obj-y += decode.o
>   obj-y += device.o
> +obj-$(CONFIG_IOREQ_SERVER) += dm.o
>   obj-y += domain.o
>   obj-y += domain_build.init.o
>   obj-y += domctl.o
> @@ -27,6 +28,7 @@ obj-y += guest_atomics.o
>   obj-y += guest_walk.o
>   obj-y += hvm.o
>   obj-y += io.o
> +obj-$(CONFIG_IOREQ_SERVER) += ioreq.o
>   obj-y += irq.o
>   obj-y += kernel.init.o
>   obj-$(CONFIG_LIVEPATCH) += livepatch.o
> diff --git a/xen/arch/arm/dm.c b/xen/arch/arm/dm.c
> new file mode 100644
> index 0000000..eb20344
> --- /dev/null
> +++ b/xen/arch/arm/dm.c
> @@ -0,0 +1,33 @@
> +/*
> + * Copyright (c) 2019 Arm ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <xen/hypercall.h>
> +
> +int arch_dm_op(struct xen_dm_op *op, struct domain *d,
> +               const struct dmop_args *op_args, bool *const_op)
> +{
> +    return -EOPNOTSUPP;
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index 3116932..043db3f 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -14,6 +14,7 @@
>   #include <xen/grant_table.h>
>   #include <xen/hypercall.h>
>   #include <xen/init.h>
> +#include <xen/ioreq.h>
>   #include <xen/lib.h>
>   #include <xen/livepatch.h>
>   #include <xen/sched.h>
> @@ -681,6 +682,10 @@ int arch_domain_create(struct domain *d,
>   
>       ASSERT(config != NULL);
>   
> +#ifdef CONFIG_IOREQ_SERVER
> +    hvm_ioreq_init(d);
> +#endif
> +
>       /* p2m_init relies on some value initialized by the IOMMU subsystem */
>       if ( (rc = iommu_domain_init(d, config->iommu_opts)) != 0 )
>           goto fail;
> @@ -999,6 +1004,10 @@ int domain_relinquish_resources(struct domain *d)
>           if (ret )
>               return ret;
>   
> +#ifdef CONFIG_IOREQ_SERVER
> +        hvm_destroy_all_ioreq_servers(d);
> +#endif
> +
>       PROGRESS(xen):
>           ret = relinquish_memory(d, &d->xenpage_list);
>           if ( ret )
> diff --git a/xen/arch/arm/io.c b/xen/arch/arm/io.c
> index ae7ef96..adc9de7 100644
> --- a/xen/arch/arm/io.c
> +++ b/xen/arch/arm/io.c
> @@ -16,6 +16,7 @@
>    * GNU General Public License for more details.
>    */
>   
> +#include <xen/ioreq.h>
>   #include <xen/lib.h>
>   #include <xen/spinlock.h>
>   #include <xen/sched.h>
> @@ -123,7 +124,15 @@ enum io_state try_handle_mmio(struct cpu_user_regs *regs,
>   
>       handler = find_mmio_handler(v->domain, info.gpa);
>       if ( !handler )
> -        return IO_UNHANDLED;
> +    {
> +        int rc;
> +
> +        rc = try_fwd_ioserv(regs, v, &info);
> +        if ( rc == IO_HANDLED )
> +            return handle_ioserv(regs, v);
> +
> +        return rc;
> +    }
>   
>       /* All the instructions used on emulated MMIO region should be valid */
>       if ( !dabt.valid )
> diff --git a/xen/arch/arm/ioreq.c b/xen/arch/arm/ioreq.c
> new file mode 100644
> index 0000000..e493c5b
> --- /dev/null
> +++ b/xen/arch/arm/ioreq.c
> @@ -0,0 +1,142 @@
> +/*
> + * arm/ioreq.c: hardware virtual machine I/O emulation
> + *
> + * Copyright (c) 2019 Arm ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <xen/domain.h>
> +#include <xen/ioreq.h>
> +
> +#include <public/hvm/ioreq.h>
> +
> +#include <asm/traps.h>
> +
> +enum io_state handle_ioserv(struct cpu_user_regs *regs, struct vcpu *v)
> +{
> +    const union hsr hsr = { .bits = regs->hsr };
> +    const struct hsr_dabt dabt = hsr.dabt;
> +    /* Code is similar to handle_read */
> +    uint8_t size = (1 << dabt.size) * 8;
> +    register_t r = v->arch.hvm.hvm_io.io_req.data;
> +
> +    /* We are done with the IO */
> +    v->arch.hvm.hvm_io.io_req.state = STATE_IOREQ_NONE;
> +
> +    /* XXX: Do we need to take care of write here ? */

It doesn't look like we need to do anything for write as they have 
completed. Is there anything else we need to confirm?

> +    if ( dabt.write )
> +        return IO_HANDLED;
> +
> +    /*
> +     * Sign extend if required.
> +     * Note that we expect the read handler to have zeroed the bits
> +     * outside the requested access size.
> +     */
> +    if ( dabt.sign && (r & (1UL << (size - 1))) )
> +    {
> +        /*
> +         * We are relying on register_t using the same as
> +         * an unsigned long in order to keep the 32-bit assembly
> +         * code smaller.
> +         */
> +        BUILD_BUG_ON(sizeof(register_t) != sizeof(unsigned long));
> +        r |= (~0UL) << size;
> +    }
> +
> +    set_user_reg(regs, dabt.reg, r);
> +
> +    return IO_HANDLED;
> +}
> +
> +enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
> +                             struct vcpu *v, mmio_info_t *info)
> +{
> +    struct hvm_vcpu_io *vio = &v->arch.hvm.hvm_io;
> +    ioreq_t p = {
> +        .type = IOREQ_TYPE_COPY,
> +        .addr = info->gpa,
> +        .size = 1 << info->dabt.size,
> +        .count = 1,
> +        .dir = !info->dabt.write,
> +        /*
> +         * On x86, df is used by 'rep' instruction to tell the direction
> +         * to iterate (forward or backward).
> +         * On Arm, all the accesses to MMIO region will do a single
> +         * memory access. So for now, we can safely always set to 0.
> +         */
> +        .df = 0,
> +        .data = get_user_reg(regs, info->dabt.reg),
> +        .state = STATE_IOREQ_READY,
> +    };
> +    struct hvm_ioreq_server *s = NULL;
> +    enum io_state rc;
> +
> +    switch ( vio->io_req.state )
> +    {
> +    case STATE_IOREQ_NONE:
> +        break;
> +
> +    case STATE_IORESP_READY:
> +        return IO_HANDLED;
> +
> +    default:
> +        gdprintk(XENLOG_ERR, "wrong state %u\n", vio->io_req.state);
> +        return IO_ABORT;
> +    }
> +
> +    s = hvm_select_ioreq_server(v->domain, &p);
> +    if ( !s )
> +        return IO_UNHANDLED;
> +
> +    if ( !info->dabt.valid )
> +        return IO_ABORT;
> +
> +    vio->io_req = p;
> +
> +    rc = hvm_send_ioreq(s, &p, 0);
> +    if ( rc != IO_RETRY || v->domain->is_shutting_down )
> +        vio->io_req.state = STATE_IOREQ_NONE;
> +    else if ( !hvm_ioreq_needs_completion(&vio->io_req) )
> +        rc = IO_HANDLED;
> +    else
> +        vio->io_completion = HVMIO_mmio_completion;
> +
> +    return rc;
> +}
> +
> +bool ioreq_handle_complete_mmio(void)
> +{
> +    struct vcpu *v = current;
> +    struct cpu_user_regs *regs = guest_cpu_user_regs();
> +    const union hsr hsr = { .bits = regs->hsr };
> +    paddr_t addr = v->arch.hvm.hvm_io.io_req.addr;
> +
> +    if ( try_handle_mmio(regs, hsr, addr) == IO_HANDLED )
> +    {
> +        advance_pc(regs, hsr);
> +        return true;
> +    }
> +
> +    return false;
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
> index 8f40d0e..121942c 100644
> --- a/xen/arch/arm/traps.c
> +++ b/xen/arch/arm/traps.c
> @@ -21,6 +21,7 @@
>   #include <xen/hypercall.h>
>   #include <xen/init.h>
>   #include <xen/iocap.h>
> +#include <xen/ioreq.h>
>   #include <xen/irq.h>
>   #include <xen/lib.h>
>   #include <xen/mem_access.h>
> @@ -1384,6 +1385,9 @@ static arm_hypercall_t arm_hypercall_table[] = {
>   #ifdef CONFIG_HYPFS
>       HYPERCALL(hypfs_op, 5),
>   #endif
> +#ifdef CONFIG_IOREQ_SERVER
> +    HYPERCALL(dm_op, 3),
> +#endif
>   };
>   
>   #ifndef NDEBUG
> @@ -1955,9 +1959,14 @@ static void do_trap_stage2_abort_guest(struct cpu_user_regs *regs,
>               case IO_HANDLED:
>                   advance_pc(regs, hsr);
>                   return;
> +            case IO_RETRY:
> +                /* finish later */
> +                return;
>               case IO_UNHANDLED:
>                   /* IO unhandled, try another way to handle it. */
>                   break;
> +            default:
> +                ASSERT_UNREACHABLE();
>               }
>           }
>   
> @@ -2249,12 +2258,23 @@ static void check_for_pcpu_work(void)
>    * Process pending work for the vCPU. Any call should be fast or
>    * implement preemption.
>    */
> -static void check_for_vcpu_work(void)
> +static bool check_for_vcpu_work(void)
>   {
>       struct vcpu *v = current;
>   
> +#ifdef CONFIG_IOREQ_SERVER
> +    bool handled;
> +
> +    local_irq_enable();
> +    handled = handle_hvm_io_completion(v);
> +    local_irq_disable();
> +
> +    if ( !handled )
> +        return true;
> +#endif
> +
>       if ( likely(!v->arch.need_flush_to_ram) )
> -        return;
> +        return false;
>   
>       /*
>        * Give a chance for the pCPU to process work before handling the vCPU
> @@ -2265,6 +2285,8 @@ static void check_for_vcpu_work(void)
>       local_irq_enable();
>       p2m_flush_vm(v);
>       local_irq_disable();
> +
> +    return false;
>   }
>   
>   /*
> @@ -2277,8 +2299,10 @@ void leave_hypervisor_to_guest(void)
>   {
>       local_irq_disable();
>   
> -    check_for_vcpu_work();
> -    check_for_pcpu_work();
> +    do
> +    {
> +        check_for_pcpu_work();
> +    } while ( check_for_vcpu_work() );
>   
>       vgic_sync_to_lrs();
>   
> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> index 6819a3b..d1c48d7 100644
> --- a/xen/include/asm-arm/domain.h
> +++ b/xen/include/asm-arm/domain.h
> @@ -11,10 +11,27 @@
>   #include <asm/vgic.h>
>   #include <asm/vpl011.h>
>   #include <public/hvm/params.h>
> +#include <public/hvm/dm_op.h>
> +#include <public/hvm/ioreq.h>
> +
> +#define MAX_NR_IOREQ_SERVERS 8
>   
>   struct hvm_domain
>   {
>       uint64_t              params[HVM_NR_PARAMS];
> +
> +    /* Guest page range used for non-default ioreq servers */
> +    struct {
> +        unsigned long base;
> +        unsigned long mask;
> +        unsigned long legacy_mask; /* indexed by HVM param number */
> +    } ioreq_gfn;
> +
> +    /* Lock protects all other values in the sub-struct and the default */
> +    struct {
> +        spinlock_t              lock;
> +        struct hvm_ioreq_server *server[MAX_NR_IOREQ_SERVERS];
> +    } ioreq_server;
>   };
>   
>   #ifdef CONFIG_ARM_64
> @@ -91,6 +108,28 @@ struct arch_domain
>   #endif
>   }  __cacheline_aligned;
>   
> +enum hvm_io_completion {
> +    HVMIO_no_completion,
> +    HVMIO_mmio_completion,
> +    HVMIO_pio_completion
> +};
> +
> +struct hvm_vcpu_io {
> +    /* I/O request in flight to device model. */
> +    enum hvm_io_completion io_completion;
> +    ioreq_t                io_req;
> +
> +    /*
> +     * HVM emulation:
> +     *  Linear address @mmio_gla maps to MMIO physical frame @mmio_gpfn.
> +     *  The latter is known to be an MMIO frame (not RAM).
> +     *  This translation is only valid for accesses as per @mmio_access.
> +     */
> +    struct npfec        mmio_access;
> +    unsigned long       mmio_gla;
> +    unsigned long       mmio_gpfn;
> +};
> +

Why do we need to re-define most of this? Can't this just be in common code?

I would also rather not define them if !CONFIG_IOREQ_SERVER is not set.


>   struct arch_vcpu
>   {
>       struct {
> @@ -204,6 +243,11 @@ struct arch_vcpu
>        */
>       bool need_flush_to_ram;
>   
> +    struct hvm_vcpu
> +    {
> +        struct hvm_vcpu_io hvm_io;
> +    } hvm;

The IOREQ code is meant to be agnostic from the type of guest, so I 
don't really see a reason for the common code to access arch.hvm.

This should be abstracted appropriately.

> +
>   }  __cacheline_aligned;
>   
>   void vcpu_show_execution_state(struct vcpu *);
> @@ -262,6 +306,8 @@ static inline void arch_vcpu_block(struct vcpu *v) {}
>   
>   #define arch_vm_assist_valid_mask(d) (1UL << VMASST_TYPE_runstate_update_flag)
>   
> +#define has_vpci(d)    ({ (void)(d); false; })
> +
>   #endif /* __ASM_DOMAIN_H__ */
>   
>   /*
> diff --git a/xen/include/asm-arm/hvm/ioreq.h b/xen/include/asm-arm/hvm/ioreq.h
> new file mode 100644
> index 0000000..1c34df0
> --- /dev/null
> +++ b/xen/include/asm-arm/hvm/ioreq.h
> @@ -0,0 +1,108 @@
> +/*
> + * hvm.h: Hardware virtual machine assist interface definitions.
> + *
> + * Copyright (c) 2016 Citrix Systems Inc.
> + * Copyright (c) 2019 Arm ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef __ASM_ARM_HVM_IOREQ_H__
> +#define __ASM_ARM_HVM_IOREQ_H__
> +
> +#include <public/hvm/ioreq.h>
> +#include <public/hvm/dm_op.h>
> +
> +#ifdef CONFIG_IOREQ_SERVER
> +enum io_state handle_ioserv(struct cpu_user_regs *regs, struct vcpu *v);
> +enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
> +                             struct vcpu *v, mmio_info_t *info);
> +#else
> +static inline enum io_state handle_ioserv(struct cpu_user_regs *regs,
> +                                          struct vcpu *v)
> +{
> +    return IO_UNHANDLED;
> +}
> +
> +static inline enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
> +                                           struct vcpu *v, mmio_info_t *info)
> +{
> +    return IO_UNHANDLED;
> +}
> +#endif
> +
> +bool ioreq_handle_complete_mmio(void);
> +
> +static inline bool handle_pio(uint16_t port, unsigned int size, int dir)
> +{
> +    /*
> +     * TODO: For Arm64, the main user will be PCI. So this should be
> +     * implemented when we add support for vPCI.
> +     */
> +    BUG();

Why do you use a BUG() and not an ASSERT_UNREACHABLE()?

> +    return true;
> +}
> +
> +static inline int arch_hvm_destroy_ioreq_server(struct hvm_ioreq_server *s)
> +{
> +    return 0;
> +}
> +
> +static inline void msix_write_completion(struct vcpu *v)
> +{
> +}
> +
> +static inline bool arch_handle_hvm_io_completion(
> +    enum hvm_io_completion io_completion)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +
> +static inline int hvm_get_ioreq_server_range_type(struct domain *d,
> +                                                  ioreq_t *p,
> +                                                  uint8_t *type,
> +                                                  uint64_t *addr)
> +{
> +    if ( p->type != IOREQ_TYPE_COPY && p->type != IOREQ_TYPE_PIO )
> +        return -EINVAL;
> +
> +    *type = (p->type == IOREQ_TYPE_PIO) ?
> +             XEN_DMOP_IO_RANGE_PORT : XEN_DMOP_IO_RANGE_MEMORY;
> +    *addr = p->addr;
> +
> +    return 0;
> +}
> +
> +static inline void arch_hvm_ioreq_init(struct domain *d)
> +{
> +}
> +
> +static inline void arch_hvm_ioreq_destroy(struct domain *d)
> +{
> +}
> +
> +#define IOREQ_IO_HANDLED     IO_HANDLED
> +#define IOREQ_IO_UNHANDLED   IO_UNHANDLED
> +#define IOREQ_IO_RETRY       IO_RETRY
> +
> +#endif /* __ASM_ARM_HVM_IOREQ_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/include/asm-arm/mmio.h b/xen/include/asm-arm/mmio.h
> index 8dbfb27..7ab873c 100644
> --- a/xen/include/asm-arm/mmio.h
> +++ b/xen/include/asm-arm/mmio.h
> @@ -37,6 +37,7 @@ enum io_state
>       IO_ABORT,       /* The IO was handled by the helper and led to an abort. */
>       IO_HANDLED,     /* The IO was successfully handled by the helper. */
>       IO_UNHANDLED,   /* The IO was not handled by the helper. */
> +    IO_RETRY,       /* Retry the emulation for some reason */
>   };
>   
>   typedef int (*mmio_read_t)(struct vcpu *v, mmio_info_t *info,
> diff --git a/xen/include/asm-arm/paging.h b/xen/include/asm-arm/paging.h
> index 6d1a000..0550c55 100644
> --- a/xen/include/asm-arm/paging.h
> +++ b/xen/include/asm-arm/paging.h
> @@ -4,6 +4,10 @@
>   #define paging_mode_translate(d)              (1)
>   #define paging_mode_external(d)               (1)
>   
> +static inline void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn)
> +{
> +}
> +
>   #endif /* XEN_PAGING_H */
>   
>   /*
> 

Cheers,
Oleksandr Tyshchenko Sept. 23, 2020, 8:16 p.m. UTC | #5
On 23.09.20 21:03, Julien Grall wrote:
> Hi,

Hi Julien


>
> On 10/09/2020 21:22, Oleksandr Tyshchenko wrote:
>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>
> I believe I am the originally author of this code...

Sorry, will fix


>
>> This patch adds basic IOREQ/DM support on Arm. The subsequent
>> patches will improve functionality, add remaining bits as well as
>> address several TODOs.
>
> Find a bit weird to add code with TODOs that are handled in the same 
> series? Can't we just split this patch in smaller one where everything 
> is addressed from start?
Sorry if I wasn't clear in description. Let me please clarify.
Corresponding RFC patch had 3 major TODOs:
1. Handle properly when hvm_send_ioreq() returns IO_RETRY
2. Proper ref-counting for the foreign entries in set_foreign_p2m_entry()
3. Check the return value of handle_hvm_io_completion() *and* avoid 
calling handle_hvm_io_completion() on every return.

TODO #1 was fixed in current patch
TODO #2 was fixed in "xen/mm: Handle properly reference in 
set_foreign_p2m_entry() on Arm"
TODO #3 was partially fixed in current patch (check the return value of 
handle_hvm_io_completion()).
The second part of TODO #3 (avoid calling handle_hvm_io_completion() on 
every return) was moved to a separate patch "xen/ioreq: Introduce 
hvm_domain_has_ioreq_server()"
and fixed (or probably improved is a better word) there along with 
introducing a mechanism to actually improve.

Could you please clarify how this patch could be split in smaller one?


>
>
>>
>> Please note, the "PIO handling" TODO is expected to left unaddressed
>> for the current series. It is not an big issue for now while Xen
>> doesn't have support for vPCI on Arm. On Arm64 they are only used
>> for PCI IO Bar and we would probably want to expose them to emulator
>> as PIO access to make a DM completely arch-agnostic. So "PIO handling"
>> should be implemented when we add support for vPCI.
>>
>> Please note, at the moment build on Arm32 is broken (see cmpxchg
>> usage in hvm_send_buffered_ioreq()) due to the lack of cmpxchg_64
>> support on Arm32. There is a patch on review to address this issue:
>> https://patchwork.kernel.org/patch/11715559/
>
> This has been committed.

Thank you for the patch, will remove a notice.


>
>>
>> Signed-off-by: Julien Grall <julien.grall@arm.com>
>> Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>>
>> ---
>> ---
>>   xen/arch/arm/Kconfig            |   1 +
>>   xen/arch/arm/Makefile           |   2 +
>>   xen/arch/arm/dm.c               |  33 ++++++++++
>>   xen/arch/arm/domain.c           |   9 +++
>>   xen/arch/arm/io.c               |  11 +++-
>>   xen/arch/arm/ioreq.c            | 142 
>> ++++++++++++++++++++++++++++++++++++++++
>>   xen/arch/arm/traps.c            |  32 +++++++--
>>   xen/include/asm-arm/domain.h    |  46 +++++++++++++
>>   xen/include/asm-arm/hvm/ioreq.h | 108 ++++++++++++++++++++++++++++++
>>   xen/include/asm-arm/mmio.h      |   1 +
>>   xen/include/asm-arm/paging.h    |   4 ++
>>   11 files changed, 384 insertions(+), 5 deletions(-)
>>   create mode 100644 xen/arch/arm/dm.c
>>   create mode 100644 xen/arch/arm/ioreq.c
>>   create mode 100644 xen/include/asm-arm/hvm/ioreq.h
>>
>> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
>> index 2777388..8264cd6 100644
>> --- a/xen/arch/arm/Kconfig
>> +++ b/xen/arch/arm/Kconfig
>> @@ -21,6 +21,7 @@ config ARM
>>       select HAS_PASSTHROUGH
>>       select HAS_PDX
>>       select IOMMU_FORCE_PT_SHARE
>> +    select IOREQ_SERVER
>
> I would prefer if IOREQ_SERVER is not included in the default build of 
> Xen. This is fairly big feature that require a lot more testing.

Sounds reasonable. Will remove.


>
>
>>     config ARCH_DEFCONFIG
>>       string
>> diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
>> index 7e82b21..617fa3e 100644
>> --- a/xen/arch/arm/Makefile
>> +++ b/xen/arch/arm/Makefile
>> @@ -13,6 +13,7 @@ obj-y += cpuerrata.o
>>   obj-y += cpufeature.o
>>   obj-y += decode.o
>>   obj-y += device.o
>> +obj-$(CONFIG_IOREQ_SERVER) += dm.o
>>   obj-y += domain.o
>>   obj-y += domain_build.init.o
>>   obj-y += domctl.o
>> @@ -27,6 +28,7 @@ obj-y += guest_atomics.o
>>   obj-y += guest_walk.o
>>   obj-y += hvm.o
>>   obj-y += io.o
>> +obj-$(CONFIG_IOREQ_SERVER) += ioreq.o
>>   obj-y += irq.o
>>   obj-y += kernel.init.o
>>   obj-$(CONFIG_LIVEPATCH) += livepatch.o
>> diff --git a/xen/arch/arm/dm.c b/xen/arch/arm/dm.c
>> new file mode 100644
>> index 0000000..eb20344
>> --- /dev/null
>> +++ b/xen/arch/arm/dm.c
>> @@ -0,0 +1,33 @@
>> +/*
>> + * Copyright (c) 2019 Arm ltd.
>> + *
>> + * This program is free software; you can redistribute it and/or 
>> modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but 
>> WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of 
>> MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public 
>> License for
>> + * more details.
>> + *
>> + * You should have received a copy of the GNU General Public License 
>> along with
>> + * this program; If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#include <xen/hypercall.h>
>> +
>> +int arch_dm_op(struct xen_dm_op *op, struct domain *d,
>> +               const struct dmop_args *op_args, bool *const_op)
>> +{
>> +    return -EOPNOTSUPP;
>> +}
>> +
>> +/*
>> + * Local variables:
>> + * mode: C
>> + * c-file-style: "BSD"
>> + * c-basic-offset: 4
>> + * tab-width: 4
>> + * indent-tabs-mode: nil
>> + * End:
>> + */
>> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
>> index 3116932..043db3f 100644
>> --- a/xen/arch/arm/domain.c
>> +++ b/xen/arch/arm/domain.c
>> @@ -14,6 +14,7 @@
>>   #include <xen/grant_table.h>
>>   #include <xen/hypercall.h>
>>   #include <xen/init.h>
>> +#include <xen/ioreq.h>
>>   #include <xen/lib.h>
>>   #include <xen/livepatch.h>
>>   #include <xen/sched.h>
>> @@ -681,6 +682,10 @@ int arch_domain_create(struct domain *d,
>>         ASSERT(config != NULL);
>>   +#ifdef CONFIG_IOREQ_SERVER
>> +    hvm_ioreq_init(d);
>> +#endif
>> +
>>       /* p2m_init relies on some value initialized by the IOMMU 
>> subsystem */
>>       if ( (rc = iommu_domain_init(d, config->iommu_opts)) != 0 )
>>           goto fail;
>> @@ -999,6 +1004,10 @@ int domain_relinquish_resources(struct domain *d)
>>           if (ret )
>>               return ret;
>>   +#ifdef CONFIG_IOREQ_SERVER
>> +        hvm_destroy_all_ioreq_servers(d);
>> +#endif
>> +
>>       PROGRESS(xen):
>>           ret = relinquish_memory(d, &d->xenpage_list);
>>           if ( ret )
>> diff --git a/xen/arch/arm/io.c b/xen/arch/arm/io.c
>> index ae7ef96..adc9de7 100644
>> --- a/xen/arch/arm/io.c
>> +++ b/xen/arch/arm/io.c
>> @@ -16,6 +16,7 @@
>>    * GNU General Public License for more details.
>>    */
>>   +#include <xen/ioreq.h>
>>   #include <xen/lib.h>
>>   #include <xen/spinlock.h>
>>   #include <xen/sched.h>
>> @@ -123,7 +124,15 @@ enum io_state try_handle_mmio(struct 
>> cpu_user_regs *regs,
>>         handler = find_mmio_handler(v->domain, info.gpa);
>>       if ( !handler )
>> -        return IO_UNHANDLED;
>> +    {
>> +        int rc;
>> +
>> +        rc = try_fwd_ioserv(regs, v, &info);
>> +        if ( rc == IO_HANDLED )
>> +            return handle_ioserv(regs, v);
>> +
>> +        return rc;
>> +    }
>>         /* All the instructions used on emulated MMIO region should 
>> be valid */
>>       if ( !dabt.valid )
>> diff --git a/xen/arch/arm/ioreq.c b/xen/arch/arm/ioreq.c
>> new file mode 100644
>> index 0000000..e493c5b
>> --- /dev/null
>> +++ b/xen/arch/arm/ioreq.c
>> @@ -0,0 +1,142 @@
>> +/*
>> + * arm/ioreq.c: hardware virtual machine I/O emulation
>> + *
>> + * Copyright (c) 2019 Arm ltd.
>> + *
>> + * This program is free software; you can redistribute it and/or 
>> modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but 
>> WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of 
>> MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public 
>> License for
>> + * more details.
>> + *
>> + * You should have received a copy of the GNU General Public License 
>> along with
>> + * this program; If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#include <xen/domain.h>
>> +#include <xen/ioreq.h>
>> +
>> +#include <public/hvm/ioreq.h>
>> +
>> +#include <asm/traps.h>
>> +
>> +enum io_state handle_ioserv(struct cpu_user_regs *regs, struct vcpu *v)
>> +{
>> +    const union hsr hsr = { .bits = regs->hsr };
>> +    const struct hsr_dabt dabt = hsr.dabt;
>> +    /* Code is similar to handle_read */
>> +    uint8_t size = (1 << dabt.size) * 8;
>> +    register_t r = v->arch.hvm.hvm_io.io_req.data;
>> +
>> +    /* We are done with the IO */
>> +    v->arch.hvm.hvm_io.io_req.state = STATE_IOREQ_NONE;
>> +
>> +    /* XXX: Do we need to take care of write here ? */
>
> It doesn't look like we need to do anything for write as they have 
> completed. Is there anything else we need to confirm?

Agree, it was discussed for the RFC series. I forgot to remove it.


>
>
>> +    if ( dabt.write )
>> +        return IO_HANDLED;
>> +
>> +    /*
>> +     * Sign extend if required.
>> +     * Note that we expect the read handler to have zeroed the bits
>> +     * outside the requested access size.
>> +     */
>> +    if ( dabt.sign && (r & (1UL << (size - 1))) )
>> +    {
>> +        /*
>> +         * We are relying on register_t using the same as
>> +         * an unsigned long in order to keep the 32-bit assembly
>> +         * code smaller.
>> +         */
>> +        BUILD_BUG_ON(sizeof(register_t) != sizeof(unsigned long));
>> +        r |= (~0UL) << size;
>> +    }
>> +
>> +    set_user_reg(regs, dabt.reg, r);
>> +
>> +    return IO_HANDLED;
>> +}
>> +
>> +enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
>> +                             struct vcpu *v, mmio_info_t *info)
>> +{
>> +    struct hvm_vcpu_io *vio = &v->arch.hvm.hvm_io;
>> +    ioreq_t p = {
>> +        .type = IOREQ_TYPE_COPY,
>> +        .addr = info->gpa,
>> +        .size = 1 << info->dabt.size,
>> +        .count = 1,
>> +        .dir = !info->dabt.write,
>> +        /*
>> +         * On x86, df is used by 'rep' instruction to tell the 
>> direction
>> +         * to iterate (forward or backward).
>> +         * On Arm, all the accesses to MMIO region will do a single
>> +         * memory access. So for now, we can safely always set to 0.
>> +         */
>> +        .df = 0,
>> +        .data = get_user_reg(regs, info->dabt.reg),
>> +        .state = STATE_IOREQ_READY,
>> +    };
>> +    struct hvm_ioreq_server *s = NULL;
>> +    enum io_state rc;
>> +
>> +    switch ( vio->io_req.state )
>> +    {
>> +    case STATE_IOREQ_NONE:
>> +        break;
>> +
>> +    case STATE_IORESP_READY:
>> +        return IO_HANDLED;
>> +
>> +    default:
>> +        gdprintk(XENLOG_ERR, "wrong state %u\n", vio->io_req.state);
>> +        return IO_ABORT;
>> +    }
>> +
>> +    s = hvm_select_ioreq_server(v->domain, &p);
>> +    if ( !s )
>> +        return IO_UNHANDLED;
>> +
>> +    if ( !info->dabt.valid )
>> +        return IO_ABORT;
>> +
>> +    vio->io_req = p;
>> +
>> +    rc = hvm_send_ioreq(s, &p, 0);
>> +    if ( rc != IO_RETRY || v->domain->is_shutting_down )
>> +        vio->io_req.state = STATE_IOREQ_NONE;
>> +    else if ( !hvm_ioreq_needs_completion(&vio->io_req) )
>> +        rc = IO_HANDLED;
>> +    else
>> +        vio->io_completion = HVMIO_mmio_completion;
>> +
>> +    return rc;
>> +}
>> +
>> +bool ioreq_handle_complete_mmio(void)
>> +{
>> +    struct vcpu *v = current;
>> +    struct cpu_user_regs *regs = guest_cpu_user_regs();
>> +    const union hsr hsr = { .bits = regs->hsr };
>> +    paddr_t addr = v->arch.hvm.hvm_io.io_req.addr;
>> +
>> +    if ( try_handle_mmio(regs, hsr, addr) == IO_HANDLED )
>> +    {
>> +        advance_pc(regs, hsr);
>> +        return true;
>> +    }
>> +
>> +    return false;
>> +}
>> +
>> +/*
>> + * Local variables:
>> + * mode: C
>> + * c-file-style: "BSD"
>> + * c-basic-offset: 4
>> + * tab-width: 4
>> + * indent-tabs-mode: nil
>> + * End:
>> + */
>> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
>> index 8f40d0e..121942c 100644
>> --- a/xen/arch/arm/traps.c
>> +++ b/xen/arch/arm/traps.c
>> @@ -21,6 +21,7 @@
>>   #include <xen/hypercall.h>
>>   #include <xen/init.h>
>>   #include <xen/iocap.h>
>> +#include <xen/ioreq.h>
>>   #include <xen/irq.h>
>>   #include <xen/lib.h>
>>   #include <xen/mem_access.h>
>> @@ -1384,6 +1385,9 @@ static arm_hypercall_t arm_hypercall_table[] = {
>>   #ifdef CONFIG_HYPFS
>>       HYPERCALL(hypfs_op, 5),
>>   #endif
>> +#ifdef CONFIG_IOREQ_SERVER
>> +    HYPERCALL(dm_op, 3),
>> +#endif
>>   };
>>     #ifndef NDEBUG
>> @@ -1955,9 +1959,14 @@ static void do_trap_stage2_abort_guest(struct 
>> cpu_user_regs *regs,
>>               case IO_HANDLED:
>>                   advance_pc(regs, hsr);
>>                   return;
>> +            case IO_RETRY:
>> +                /* finish later */
>> +                return;
>>               case IO_UNHANDLED:
>>                   /* IO unhandled, try another way to handle it. */
>>                   break;
>> +            default:
>> +                ASSERT_UNREACHABLE();
>>               }
>>           }
>>   @@ -2249,12 +2258,23 @@ static void check_for_pcpu_work(void)
>>    * Process pending work for the vCPU. Any call should be fast or
>>    * implement preemption.
>>    */
>> -static void check_for_vcpu_work(void)
>> +static bool check_for_vcpu_work(void)
>>   {
>>       struct vcpu *v = current;
>>   +#ifdef CONFIG_IOREQ_SERVER
>> +    bool handled;
>> +
>> +    local_irq_enable();
>> +    handled = handle_hvm_io_completion(v);
>> +    local_irq_disable();
>> +
>> +    if ( !handled )
>> +        return true;
>> +#endif
>> +
>>       if ( likely(!v->arch.need_flush_to_ram) )
>> -        return;
>> +        return false;
>>         /*
>>        * Give a chance for the pCPU to process work before handling 
>> the vCPU
>> @@ -2265,6 +2285,8 @@ static void check_for_vcpu_work(void)
>>       local_irq_enable();
>>       p2m_flush_vm(v);
>>       local_irq_disable();
>> +
>> +    return false;
>>   }
>>     /*
>> @@ -2277,8 +2299,10 @@ void leave_hypervisor_to_guest(void)
>>   {
>>       local_irq_disable();
>>   -    check_for_vcpu_work();
>> -    check_for_pcpu_work();
>> +    do
>> +    {
>> +        check_for_pcpu_work();
>> +    } while ( check_for_vcpu_work() );
>>         vgic_sync_to_lrs();
>>   diff --git a/xen/include/asm-arm/domain.h 
>> b/xen/include/asm-arm/domain.h
>> index 6819a3b..d1c48d7 100644
>> --- a/xen/include/asm-arm/domain.h
>> +++ b/xen/include/asm-arm/domain.h
>> @@ -11,10 +11,27 @@
>>   #include <asm/vgic.h>
>>   #include <asm/vpl011.h>
>>   #include <public/hvm/params.h>
>> +#include <public/hvm/dm_op.h>
>> +#include <public/hvm/ioreq.h>
>> +
>> +#define MAX_NR_IOREQ_SERVERS 8
>>     struct hvm_domain
>>   {
>>       uint64_t              params[HVM_NR_PARAMS];
>> +
>> +    /* Guest page range used for non-default ioreq servers */
>> +    struct {
>> +        unsigned long base;
>> +        unsigned long mask;
>> +        unsigned long legacy_mask; /* indexed by HVM param number */
>> +    } ioreq_gfn;
>> +
>> +    /* Lock protects all other values in the sub-struct and the 
>> default */
>> +    struct {
>> +        spinlock_t              lock;
>> +        struct hvm_ioreq_server *server[MAX_NR_IOREQ_SERVERS];
>> +    } ioreq_server;
>>   };
>>     #ifdef CONFIG_ARM_64
>> @@ -91,6 +108,28 @@ struct arch_domain
>>   #endif
>>   }  __cacheline_aligned;
>>   +enum hvm_io_completion {
>> +    HVMIO_no_completion,
>> +    HVMIO_mmio_completion,
>> +    HVMIO_pio_completion
>> +};
>> +
>> +struct hvm_vcpu_io {
>> +    /* I/O request in flight to device model. */
>> +    enum hvm_io_completion io_completion;
>> +    ioreq_t                io_req;
>> +
>> +    /*
>> +     * HVM emulation:
>> +     *  Linear address @mmio_gla maps to MMIO physical frame 
>> @mmio_gpfn.
>> +     *  The latter is known to be an MMIO frame (not RAM).
>> +     *  This translation is only valid for accesses as per 
>> @mmio_access.
>> +     */
>> +    struct npfec        mmio_access;
>> +    unsigned long       mmio_gla;
>> +    unsigned long       mmio_gpfn;
>> +};
>> +
>
> Why do we need to re-define most of this? Can't this just be in common 
> code?

Jan asked almost the similar question in "[PATCH V1 02/16] xen/ioreq: 
Make x86's IOREQ feature common".
Please see my answer there:
https://patchwork.kernel.org/patch/11769105/#23637511

Theoretically we could move this to the common code, but the question is 
how to be with other struct fields the x86's struct hvm_vcpu_io 
has/needs but
Arm's seems not, would it be possible to logically split struct 
hvm_vcpu_io into common and arch parts?

struct hvm_vcpu_io {
     /* I/O request in flight to device model. */
     enum hvm_io_completion io_completion;
     ioreq_t                io_req;

     /*
      * HVM emulation:
      *  Linear address @mmio_gla maps to MMIO physical frame @mmio_gpfn.
      *  The latter is known to be an MMIO frame (not RAM).
      *  This translation is only valid for accesses as per @mmio_access.
      */
     struct npfec        mmio_access;
     unsigned long       mmio_gla;
     unsigned long       mmio_gpfn;

     /*
      * We may need to handle up to 3 distinct memory accesses per
      * instruction.
      */
     struct hvm_mmio_cache mmio_cache[3];
     unsigned int mmio_cache_count;

     /* For retries we shouldn't re-fetch the instruction. */
     unsigned int mmio_insn_bytes;
     unsigned char mmio_insn[16];
     struct hvmemul_cache *cache;

     /*
      * For string instruction emulation we need to be able to signal a
      * necessary retry through other than function return codes.
      */
     bool_t mmio_retry;

     unsigned long msix_unmask_address;
     unsigned long msix_snoop_address;
     unsigned long msix_snoop_gpa;

     const struct g2m_ioport *g2m_ioport;
};


>
> I would also rather not define them if !CONFIG_IOREQ_SERVER is not set.

ok


>
>
>
>>   struct arch_vcpu
>>   {
>>       struct {
>> @@ -204,6 +243,11 @@ struct arch_vcpu
>>        */
>>       bool need_flush_to_ram;
>>   +    struct hvm_vcpu
>> +    {
>> +        struct hvm_vcpu_io hvm_io;
>> +    } hvm;
>
> The IOREQ code is meant to be agnostic from the type of guest, so I 
> don't really see a reason for the common code to access arch.hvm.
>
> This should be abstracted appropriately.
Yes, there is a discussion pending in "[PATCH V1 02/16] xen/ioreq: Make 
x86's IOREQ feature common" about layering violation issue.
There won't be any uses of *arch.hvm* in the common ioreq come in next 
series. Everything containing *arch.hvm* in the common code will be 
abstracted away.


>
>
>> +
>>   }  __cacheline_aligned;
>>     void vcpu_show_execution_state(struct vcpu *);
>> @@ -262,6 +306,8 @@ static inline void arch_vcpu_block(struct vcpu 
>> *v) {}
>>     #define arch_vm_assist_valid_mask(d) (1UL << 
>> VMASST_TYPE_runstate_update_flag)
>>   +#define has_vpci(d)    ({ (void)(d); false; })
>> +
>>   #endif /* __ASM_DOMAIN_H__ */
>>     /*
>> diff --git a/xen/include/asm-arm/hvm/ioreq.h 
>> b/xen/include/asm-arm/hvm/ioreq.h
>> new file mode 100644
>> index 0000000..1c34df0
>> --- /dev/null
>> +++ b/xen/include/asm-arm/hvm/ioreq.h
>> @@ -0,0 +1,108 @@
>> +/*
>> + * hvm.h: Hardware virtual machine assist interface definitions.
>> + *
>> + * Copyright (c) 2016 Citrix Systems Inc.
>> + * Copyright (c) 2019 Arm ltd.
>> + *
>> + * This program is free software; you can redistribute it and/or 
>> modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but 
>> WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of 
>> MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public 
>> License for
>> + * more details.
>> + *
>> + * You should have received a copy of the GNU General Public License 
>> along with
>> + * this program; If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#ifndef __ASM_ARM_HVM_IOREQ_H__
>> +#define __ASM_ARM_HVM_IOREQ_H__
>> +
>> +#include <public/hvm/ioreq.h>
>> +#include <public/hvm/dm_op.h>
>> +
>> +#ifdef CONFIG_IOREQ_SERVER
>> +enum io_state handle_ioserv(struct cpu_user_regs *regs, struct vcpu 
>> *v);
>> +enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
>> +                             struct vcpu *v, mmio_info_t *info);
>> +#else
>> +static inline enum io_state handle_ioserv(struct cpu_user_regs *regs,
>> +                                          struct vcpu *v)
>> +{
>> +    return IO_UNHANDLED;
>> +}
>> +
>> +static inline enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
>> +                                           struct vcpu *v, 
>> mmio_info_t *info)
>> +{
>> +    return IO_UNHANDLED;
>> +}
>> +#endif
>> +
>> +bool ioreq_handle_complete_mmio(void);
>> +
>> +static inline bool handle_pio(uint16_t port, unsigned int size, int 
>> dir)
>> +{
>> +    /*
>> +     * TODO: For Arm64, the main user will be PCI. So this should be
>> +     * implemented when we add support for vPCI.
>> +     */
>> +    BUG();
>
> Why do you use a BUG() and not an ASSERT_UNREACHABLE()?

Yes, ASSERT_UNREACHABLE() is better suited here.
Jan Beulich Sept. 24, 2020, 11:08 a.m. UTC | #6
On 23.09.2020 22:16, Oleksandr wrote:
> On 23.09.20 21:03, Julien Grall wrote:
>> On 10/09/2020 21:22, Oleksandr Tyshchenko wrote:
>>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>>> @@ -91,6 +108,28 @@ struct arch_domain
>>>   #endif
>>>   }  __cacheline_aligned;
>>>   +enum hvm_io_completion {
>>> +    HVMIO_no_completion,
>>> +    HVMIO_mmio_completion,
>>> +    HVMIO_pio_completion
>>> +};
>>> +
>>> +struct hvm_vcpu_io {
>>> +    /* I/O request in flight to device model. */
>>> +    enum hvm_io_completion io_completion;
>>> +    ioreq_t                io_req;
>>> +
>>> +    /*
>>> +     * HVM emulation:
>>> +     *  Linear address @mmio_gla maps to MMIO physical frame 
>>> @mmio_gpfn.
>>> +     *  The latter is known to be an MMIO frame (not RAM).
>>> +     *  This translation is only valid for accesses as per 
>>> @mmio_access.
>>> +     */
>>> +    struct npfec        mmio_access;
>>> +    unsigned long       mmio_gla;
>>> +    unsigned long       mmio_gpfn;
>>> +};
>>> +
>>
>> Why do we need to re-define most of this? Can't this just be in common 
>> code?
> 
> Jan asked almost the similar question in "[PATCH V1 02/16] xen/ioreq: 
> Make x86's IOREQ feature common".
> Please see my answer there:
> https://patchwork.kernel.org/patch/11769105/#23637511
> 
> Theoretically we could move this to the common code, but the question is 
> how to be with other struct fields the x86's struct hvm_vcpu_io 
> has/needs but
> Arm's seems not, would it be possible to logically split struct 
> hvm_vcpu_io into common and arch parts?

Have struct vcpu_io and struct arch_vcpu_io as a sub-part of it?

Jan
Oleksandr Tyshchenko Sept. 24, 2020, 4:02 p.m. UTC | #7
On 24.09.20 14:08, Jan Beulich wrote:

Hi Jan

> On 23.09.2020 22:16, Oleksandr wrote:
>> On 23.09.20 21:03, Julien Grall wrote:
>>> On 10/09/2020 21:22, Oleksandr Tyshchenko wrote:
>>>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>>>> @@ -91,6 +108,28 @@ struct arch_domain
>>>>    #endif
>>>>    }  __cacheline_aligned;
>>>>    +enum hvm_io_completion {
>>>> +    HVMIO_no_completion,
>>>> +    HVMIO_mmio_completion,
>>>> +    HVMIO_pio_completion
>>>> +};
>>>> +
>>>> +struct hvm_vcpu_io {
>>>> +    /* I/O request in flight to device model. */
>>>> +    enum hvm_io_completion io_completion;
>>>> +    ioreq_t                io_req;
>>>> +
>>>> +    /*
>>>> +     * HVM emulation:
>>>> +     *  Linear address @mmio_gla maps to MMIO physical frame
>>>> @mmio_gpfn.
>>>> +     *  The latter is known to be an MMIO frame (not RAM).
>>>> +     *  This translation is only valid for accesses as per
>>>> @mmio_access.
>>>> +     */
>>>> +    struct npfec        mmio_access;
>>>> +    unsigned long       mmio_gla;
>>>> +    unsigned long       mmio_gpfn;
>>>> +};
>>>> +
>>> Why do we need to re-define most of this? Can't this just be in common
>>> code?
>> Jan asked almost the similar question in "[PATCH V1 02/16] xen/ioreq:
>> Make x86's IOREQ feature common".
>> Please see my answer there:
>> https://patchwork.kernel.org/patch/11769105/#23637511
>>
>> Theoretically we could move this to the common code, but the question is
>> how to be with other struct fields the x86's struct hvm_vcpu_io
>> has/needs but
>> Arm's seems not, would it be possible to logically split struct
>> hvm_vcpu_io into common and arch parts?
> Have struct vcpu_io and struct arch_vcpu_io as a sub-part of it?
Although it is going to pull a lot of changes into x86/hvm/*, yes this way
we indeed could logically split struct hvm_vcpu_io into common and arch 
parts in a clear way.
If it is really worth it, I will start looking into it.
Julien Grall Sept. 24, 2020, 4:51 p.m. UTC | #8
On 24/09/2020 12:08, Jan Beulich wrote:
> On 23.09.2020 22:16, Oleksandr wrote:
>> On 23.09.20 21:03, Julien Grall wrote:
>>> On 10/09/2020 21:22, Oleksandr Tyshchenko wrote:
>>>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>>>> @@ -91,6 +108,28 @@ struct arch_domain
>>>>    #endif
>>>>    }  __cacheline_aligned;
>>>>    +enum hvm_io_completion {
>>>> +    HVMIO_no_completion,
>>>> +    HVMIO_mmio_completion,
>>>> +    HVMIO_pio_completion
>>>> +};
>>>> +
>>>> +struct hvm_vcpu_io {
>>>> +    /* I/O request in flight to device model. */
>>>> +    enum hvm_io_completion io_completion;
>>>> +    ioreq_t                io_req;
>>>> +
>>>> +    /*
>>>> +     * HVM emulation:
>>>> +     *  Linear address @mmio_gla maps to MMIO physical frame
>>>> @mmio_gpfn.
>>>> +     *  The latter is known to be an MMIO frame (not RAM).
>>>> +     *  This translation is only valid for accesses as per
>>>> @mmio_access.
>>>> +     */
>>>> +    struct npfec        mmio_access;
>>>> +    unsigned long       mmio_gla;
>>>> +    unsigned long       mmio_gpfn;
>>>> +};
>>>> +
>>>
>>> Why do we need to re-define most of this? Can't this just be in common
>>> code?
>>
>> Jan asked almost the similar question in "[PATCH V1 02/16] xen/ioreq:
>> Make x86's IOREQ feature common".
>> Please see my answer there:
>> https://patchwork.kernel.org/patch/11769105/#23637511
>>
>> Theoretically we could move this to the common code, but the question is
>> how to be with other struct fields the x86's struct hvm_vcpu_io
>> has/needs but
>> Arm's seems not, would it be possible to logically split struct
>> hvm_vcpu_io into common and arch parts?
> 
> Have struct vcpu_io and struct arch_vcpu_io as a sub-part of it?

+1 for the idea.

Cheers,
Julien Grall Sept. 24, 2020, 5:25 p.m. UTC | #9
On 23/09/2020 21:16, Oleksandr wrote:
> 
> On 23.09.20 21:03, Julien Grall wrote:
>> Hi,
> 
> Hi Julien
> 
> 
>>
>> On 10/09/2020 21:22, Oleksandr Tyshchenko wrote:
>>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>>
>> I believe I am the originally author of this code...
> 
> Sorry, will fix
> 
> 
>>
>>> This patch adds basic IOREQ/DM support on Arm. The subsequent
>>> patches will improve functionality, add remaining bits as well as
>>> address several TODOs.
>>
>> Find a bit weird to add code with TODOs that are handled in the same 
>> series? Can't we just split this patch in smaller one where everything 
>> is addressed from start?
> Sorry if I wasn't clear in description. Let me please clarify.
> Corresponding RFC patch had 3 major TODOs:
> 1. Handle properly when hvm_send_ioreq() returns IO_RETRY
> 2. Proper ref-counting for the foreign entries in set_foreign_p2m_entry()
> 3. Check the return value of handle_hvm_io_completion() *and* avoid 
> calling handle_hvm_io_completion() on every return.
> 
> TODO #1 was fixed in current patch
> TODO #2 was fixed in "xen/mm: Handle properly reference in 
> set_foreign_p2m_entry() on Arm"
> TODO #3 was partially fixed in current patch (check the return value of 
> handle_hvm_io_completion()).
> The second part of TODO #3 (avoid calling handle_hvm_io_completion() on 
> every return) was moved to a separate patch "xen/ioreq: Introduce 
> hvm_domain_has_ioreq_server()"
> and fixed (or probably improved is a better word) there along with 
> introducing a mechanism to actually improve.

Right, none of those TODOs are described in the code. So it makes more 
difficult to know what you are actually referring to.

I would suggest to reshuffle the series so the TODOs are addressed 
before when possible.

> 
> Could you please clarify how this patch could be split in smaller one?

This patch is going to be reduced a fair bit if you make some of the 
structure common. The next steps would be to move anything that is not 
directly related to IOREQ out.

 From a quick look, there are few things that can be moved in separate 
patches:
    - The addition of the ASSERT_UNREACHABLE()
    - The addition of the loop in leave_hypervisor_to_guest() as I think 
it deserve some explanation.
    - The sign extension in handle_ioserv() can possibly be abstracted. 
Actually the code is quite similar to handle_read().

> 
> 
>>
>>
>>>
>>> Please note, the "PIO handling" TODO is expected to left unaddressed
>>> for the current series. It is not an big issue for now while Xen
>>> doesn't have support for vPCI on Arm. On Arm64 they are only used
>>> for PCI IO Bar and we would probably want to expose them to emulator
>>> as PIO access to make a DM completely arch-agnostic. So "PIO handling"
>>> should be implemented when we add support for vPCI.
>>>
>>> Please note, at the moment build on Arm32 is broken (see cmpxchg
>>> usage in hvm_send_buffered_ioreq()) due to the lack of cmpxchg_64
>>> support on Arm32. There is a patch on review to address this issue:
>>> https://patchwork.kernel.org/patch/11715559/
>>
>> This has been committed.
> 
> Thank you for the patch, will remove a notice.

For future reference, I think such notice would be better after --- as 
they don't need to be part of the commit message.

> 
>>
>>
>>> +    if ( dabt.write )
>>> +        return IO_HANDLED;
>>> +
>>> +    /*
>>> +     * Sign extend if required.
>>> +     * Note that we expect the read handler to have zeroed the bits
>>> +     * outside the requested access size.
>>> +     */
>>> +    if ( dabt.sign && (r & (1UL << (size - 1))) )
>>> +    {
>>> +        /*
>>> +         * We are relying on register_t using the same as
>>> +         * an unsigned long in order to keep the 32-bit assembly
>>> +         * code smaller.
>>> +         */
>>> +        BUILD_BUG_ON(sizeof(register_t) != sizeof(unsigned long));
>>> +        r |= (~0UL) << size;
>>> +    }
>>> +
>>> +    set_user_reg(regs, dabt.reg, r);
>>> +
>>> +    return IO_HANDLED;
>>> +}
>>> +
>>> +enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
>>> +                             struct vcpu *v, mmio_info_t *info)
>>> +{
>>> +    struct hvm_vcpu_io *vio = &v->arch.hvm.hvm_io;
>>> +    ioreq_t p = {
>>> +        .type = IOREQ_TYPE_COPY,
>>> +        .addr = info->gpa,
>>> +        .size = 1 << info->dabt.size,
>>> +        .count = 1,
>>> +        .dir = !info->dabt.write,
>>> +        /*
>>> +         * On x86, df is used by 'rep' instruction to tell the 
>>> direction
>>> +         * to iterate (forward or backward).
>>> +         * On Arm, all the accesses to MMIO region will do a single
>>> +         * memory access. So for now, we can safely always set to 0.
>>> +         */
>>> +        .df = 0,
>>> +        .data = get_user_reg(regs, info->dabt.reg),
>>> +        .state = STATE_IOREQ_READY,
>>> +    };
>>> +    struct hvm_ioreq_server *s = NULL;
>>> +    enum io_state rc;
>>> +
>>> +    switch ( vio->io_req.state )
>>> +    {
>>> +    case STATE_IOREQ_NONE:
>>> +        break;
>>> +
>>> +    case STATE_IORESP_READY:
>>> +        return IO_HANDLED;
>>> +
>>> +    default:
>>> +        gdprintk(XENLOG_ERR, "wrong state %u\n", vio->io_req.state);
>>> +        return IO_ABORT;
>>> +    }
>>> +
>>> +    s = hvm_select_ioreq_server(v->domain, &p);
>>> +    if ( !s )
>>> +        return IO_UNHANDLED;
>>> +
>>> +    if ( !info->dabt.valid )
>>> +        return IO_ABORT;
>>> +
>>> +    vio->io_req = p;
>>> +
>>> +    rc = hvm_send_ioreq(s, &p, 0);
>>> +    if ( rc != IO_RETRY || v->domain->is_shutting_down )
>>> +        vio->io_req.state = STATE_IOREQ_NONE;
>>> +    else if ( !hvm_ioreq_needs_completion(&vio->io_req) )
>>> +        rc = IO_HANDLED;
>>> +    else
>>> +        vio->io_completion = HVMIO_mmio_completion;
>>> +
>>> +    return rc;
>>> +}
>>> +
>>> +bool ioreq_handle_complete_mmio(void)
>>> +{
>>> +    struct vcpu *v = current;
>>> +    struct cpu_user_regs *regs = guest_cpu_user_regs();
>>> +    const union hsr hsr = { .bits = regs->hsr };
>>> +    paddr_t addr = v->arch.hvm.hvm_io.io_req.addr;
>>> +
>>> +    if ( try_handle_mmio(regs, hsr, addr) == IO_HANDLED )
>>> +    {
>>> +        advance_pc(regs, hsr);
>>> +        return true;
>>> +    }
>>> +
>>> +    return false;
>>> +}
>>> +
>>> +/*
>>> + * Local variables:
>>> + * mode: C
>>> + * c-file-style: "BSD"
>>> + * c-basic-offset: 4
>>> + * tab-width: 4
>>> + * indent-tabs-mode: nil
>>> + * End:
>>> + */
>>> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
>>> index 8f40d0e..121942c 100644
>>> --- a/xen/arch/arm/traps.c
>>> +++ b/xen/arch/arm/traps.c
>>> @@ -21,6 +21,7 @@
>>>   #include <xen/hypercall.h>
>>>   #include <xen/init.h>
>>>   #include <xen/iocap.h>
>>> +#include <xen/ioreq.h>
>>>   #include <xen/irq.h>
>>>   #include <xen/lib.h>
>>>   #include <xen/mem_access.h>
>>> @@ -1384,6 +1385,9 @@ static arm_hypercall_t arm_hypercall_table[] = {
>>>   #ifdef CONFIG_HYPFS
>>>       HYPERCALL(hypfs_op, 5),
>>>   #endif
>>> +#ifdef CONFIG_IOREQ_SERVER
>>> +    HYPERCALL(dm_op, 3),
>>> +#endif
>>>   };
>>>     #ifndef NDEBUG
>>> @@ -1955,9 +1959,14 @@ static void do_trap_stage2_abort_guest(struct 
>>> cpu_user_regs *regs,
>>>               case IO_HANDLED:
>>>                   advance_pc(regs, hsr);
>>>                   return;
>>> +            case IO_RETRY:
>>> +                /* finish later */
>>> +                return;
>>>               case IO_UNHANDLED:
>>>                   /* IO unhandled, try another way to handle it. */
>>>                   break;
>>> +            default:
>>> +                ASSERT_UNREACHABLE();
>>>               }
>>>           }
>>>   @@ -2249,12 +2258,23 @@ static void check_for_pcpu_work(void)
>>>    * Process pending work for the vCPU. Any call should be fast or
>>>    * implement preemption.
>>>    */
>>> -static void check_for_vcpu_work(void)
>>> +static bool check_for_vcpu_work(void)
>>>   {
>>>       struct vcpu *v = current;
>>>   +#ifdef CONFIG_IOREQ_SERVER
>>> +    bool handled;
>>> +
>>> +    local_irq_enable();
>>> +    handled = handle_hvm_io_completion(v);
>>> +    local_irq_disable();
>>> +
>>> +    if ( !handled )
>>> +        return true;
>>> +#endif
>>> +
>>>       if ( likely(!v->arch.need_flush_to_ram) )
>>> -        return;
>>> +        return false;
>>>         /*
>>>        * Give a chance for the pCPU to process work before handling 
>>> the vCPU
>>> @@ -2265,6 +2285,8 @@ static void check_for_vcpu_work(void)
>>>       local_irq_enable();
>>>       p2m_flush_vm(v);
>>>       local_irq_disable();
>>> +
>>> +    return false;
>>>   }
>>>     /*
>>> @@ -2277,8 +2299,10 @@ void leave_hypervisor_to_guest(void)
>>>   {
>>>       local_irq_disable();
>>>   -    check_for_vcpu_work();
>>> -    check_for_pcpu_work();
>>> +    do
>>> +    {
>>> +        check_for_pcpu_work();
>>> +    } while ( check_for_vcpu_work() );
>>>         vgic_sync_to_lrs();
>>>   diff --git a/xen/include/asm-arm/domain.h 
>>> b/xen/include/asm-arm/domain.h
>>> index 6819a3b..d1c48d7 100644
>>> --- a/xen/include/asm-arm/domain.h
>>> +++ b/xen/include/asm-arm/domain.h
>>> @@ -11,10 +11,27 @@
>>>   #include <asm/vgic.h>
>>>   #include <asm/vpl011.h>
>>>   #include <public/hvm/params.h>
>>> +#include <public/hvm/dm_op.h>
>>> +#include <public/hvm/ioreq.h>
>>> +
>>> +#define MAX_NR_IOREQ_SERVERS 8
>>>     struct hvm_domain
>>>   {
>>>       uint64_t              params[HVM_NR_PARAMS];
>>> +
>>> +    /* Guest page range used for non-default ioreq servers */
>>> +    struct {
>>> +        unsigned long base;
>>> +        unsigned long mask;
>>> +        unsigned long legacy_mask; /* indexed by HVM param number */
>>> +    } ioreq_gfn;
>>> +
>>> +    /* Lock protects all other values in the sub-struct and the 
>>> default */
>>> +    struct {
>>> +        spinlock_t              lock;
>>> +        struct hvm_ioreq_server *server[MAX_NR_IOREQ_SERVERS];
>>> +    } ioreq_server;
>>>   };
>>>     #ifdef CONFIG_ARM_64
>>> @@ -91,6 +108,28 @@ struct arch_domain
>>>   #endif
>>>   }  __cacheline_aligned;
>>>   +enum hvm_io_completion {
>>> +    HVMIO_no_completion,
>>> +    HVMIO_mmio_completion,
>>> +    HVMIO_pio_completion
>>> +};
>>> +
>>> +struct hvm_vcpu_io {
>>> +    /* I/O request in flight to device model. */
>>> +    enum hvm_io_completion io_completion;
>>> +    ioreq_t                io_req;
>>> +
>>> +    /*
>>> +     * HVM emulation:
>>> +     *  Linear address @mmio_gla maps to MMIO physical frame 
>>> @mmio_gpfn.
>>> +     *  The latter is known to be an MMIO frame (not RAM).
>>> +     *  This translation is only valid for accesses as per 
>>> @mmio_access.
>>> +     */
>>> +    struct npfec        mmio_access;
>>> +    unsigned long       mmio_gla;
>>> +    unsigned long       mmio_gpfn;
>>> +};
>>> +
>>
>> Why do we need to re-define most of this? Can't this just be in common 
>> code?
> 
> Jan asked almost the similar question in "[PATCH V1 02/16] xen/ioreq: 
> Make x86's IOREQ feature common".
> Please see my answer there:
> https://patchwork.kernel.org/patch/11769105/#23637511
> 
> Theoretically we could move this to the common code, but the question is 
> how to be with other struct fields the x86's struct hvm_vcpu_io 
> has/needs but
> Arm's seems not, would it be possible to logically split struct 
> hvm_vcpu_io into common and arch parts?
> 
> struct hvm_vcpu_io {
>      /* I/O request in flight to device model. */
>      enum hvm_io_completion io_completion;
>      ioreq_t                io_req;
> 
>      /*
>       * HVM emulation:
>       *  Linear address @mmio_gla maps to MMIO physical frame @mmio_gpfn.
>       *  The latter is known to be an MMIO frame (not RAM).
>       *  This translation is only valid for accesses as per @mmio_access.
>       */
>      struct npfec        mmio_access;
>      unsigned long       mmio_gla;
>      unsigned long       mmio_gpfn;
> 
>      /*
>       * We may need to handle up to 3 distinct memory accesses per
>       * instruction.
>       */
>      struct hvm_mmio_cache mmio_cache[3];
>      unsigned int mmio_cache_count;
> 
>      /* For retries we shouldn't re-fetch the instruction. */
>      unsigned int mmio_insn_bytes;
>      unsigned char mmio_insn[16];
>      struct hvmemul_cache *cache;
> 
>      /*
>       * For string instruction emulation we need to be able to signal a
>       * necessary retry through other than function return codes.
>       */
>      bool_t mmio_retry;
> 
>      unsigned long msix_unmask_address;
>      unsigned long msix_snoop_address;
>      unsigned long msix_snoop_gpa;
> 
>      const struct g2m_ioport *g2m_ioport;
> };

I think Jan made some suggestion today. Let me know if you require more 
input.

Cheers,
Oleksandr Tyshchenko Sept. 24, 2020, 6:02 p.m. UTC | #10
On 24.09.20 19:02, Oleksandr wrote:

Hi

>
> On 24.09.20 14:08, Jan Beulich wrote:
>
> Hi Jan
>
>> On 23.09.2020 22:16, Oleksandr wrote:
>>> On 23.09.20 21:03, Julien Grall wrote:
>>>> On 10/09/2020 21:22, Oleksandr Tyshchenko wrote:
>>>>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>>>>> @@ -91,6 +108,28 @@ struct arch_domain
>>>>>    #endif
>>>>>    }  __cacheline_aligned;
>>>>>    +enum hvm_io_completion {
>>>>> +    HVMIO_no_completion,
>>>>> +    HVMIO_mmio_completion,
>>>>> +    HVMIO_pio_completion
>>>>> +};
>>>>> +
>>>>> +struct hvm_vcpu_io {
>>>>> +    /* I/O request in flight to device model. */
>>>>> +    enum hvm_io_completion io_completion;
>>>>> +    ioreq_t                io_req;
>>>>> +
>>>>> +    /*
>>>>> +     * HVM emulation:
>>>>> +     *  Linear address @mmio_gla maps to MMIO physical frame
>>>>> @mmio_gpfn.
>>>>> +     *  The latter is known to be an MMIO frame (not RAM).
>>>>> +     *  This translation is only valid for accesses as per
>>>>> @mmio_access.
>>>>> +     */
>>>>> +    struct npfec        mmio_access;
>>>>> +    unsigned long       mmio_gla;
>>>>> +    unsigned long       mmio_gpfn;
>>>>> +};
>>>>> +
>>>> Why do we need to re-define most of this? Can't this just be in common
>>>> code?
>>> Jan asked almost the similar question in "[PATCH V1 02/16] xen/ioreq:
>>> Make x86's IOREQ feature common".
>>> Please see my answer there:
>>> https://patchwork.kernel.org/patch/11769105/#23637511
>>>
>>> Theoretically we could move this to the common code, but the 
>>> question is
>>> how to be with other struct fields the x86's struct hvm_vcpu_io
>>> has/needs but
>>> Arm's seems not, would it be possible to logically split struct
>>> hvm_vcpu_io into common and arch parts?
>> Have struct vcpu_io and struct arch_vcpu_io as a sub-part of it?
> Although it is going to pull a lot of changes into x86/hvm/*, yes this 
> way
> we indeed could logically split struct hvm_vcpu_io into common and 
> arch parts in a clear way.
> If it is really worth it, I will start looking into it.
Julien, I noticed that three fields mmio* are not used within current 
series on Arm. Do we expect them to be used later on?
I would rather not add fields which are not used. We could add them when 
needed.

Would be the following acceptable?
1. Both fields "io_completion" and "io_req" (which seems to be the only 
fields used in common/ioreq.c) are moved to common struct vcpu as part 
of struct vcpu_io,
     enum hvm_io_completion is also moved (and renamed).
2. We remove everything related to hvm_vcpu* from the Arm header.
3. x86's struct hvm_vcpu_io stays as it is (but without two fields 
"io_completion" and "io_req").
     I think, this way we separate a common part and reduce Xen changes 
(which are getting bigger).
Oleksandr Tyshchenko Sept. 24, 2020, 6:22 p.m. UTC | #11
On 24.09.20 20:25, Julien Grall wrote:

Hi Julien.

>
>
> On 23/09/2020 21:16, Oleksandr wrote:
>>
>> On 23.09.20 21:03, Julien Grall wrote:
>>> Hi,
>>
>> Hi Julien
>>
>>
>>>
>>> On 10/09/2020 21:22, Oleksandr Tyshchenko wrote:
>>>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>>>
>>> I believe I am the originally author of this code...
>>
>> Sorry, will fix
>>
>>
>>>
>>>> This patch adds basic IOREQ/DM support on Arm. The subsequent
>>>> patches will improve functionality, add remaining bits as well as
>>>> address several TODOs.
>>>
>>> Find a bit weird to add code with TODOs that are handled in the same 
>>> series? Can't we just split this patch in smaller one where 
>>> everything is addressed from start?
>> Sorry if I wasn't clear in description. Let me please clarify.
>> Corresponding RFC patch had 3 major TODOs:
>> 1. Handle properly when hvm_send_ioreq() returns IO_RETRY
>> 2. Proper ref-counting for the foreign entries in 
>> set_foreign_p2m_entry()
>> 3. Check the return value of handle_hvm_io_completion() *and* avoid 
>> calling handle_hvm_io_completion() on every return.
>>
>> TODO #1 was fixed in current patch
>> TODO #2 was fixed in "xen/mm: Handle properly reference in 
>> set_foreign_p2m_entry() on Arm"
>> TODO #3 was partially fixed in current patch (check the return value 
>> of handle_hvm_io_completion()).
>> The second part of TODO #3 (avoid calling handle_hvm_io_completion() 
>> on every return) was moved to a separate patch "xen/ioreq: Introduce 
>> hvm_domain_has_ioreq_server()"
>> and fixed (or probably improved is a better word) there along with 
>> introducing a mechanism to actually improve.
>
> Right, none of those TODOs are described in the code. So it makes more 
> difficult to know what you are actually referring to.
>
> I would suggest to reshuffle the series so the TODOs are addressed 
> before when possible.

ok, I will try to prepare something.


>
>>
>> Could you please clarify how this patch could be split in smaller one?
>
> This patch is going to be reduced a fair bit if you make some of the 
> structure common. The next steps would be to move anything that is not 
> directly related to IOREQ out.


Thank you for the clarification.
Yes, however, I believed everything in this patch is directly related to 
IOREQ...


>
>
> From a quick look, there are few things that can be moved in separate 
> patches:
>    - The addition of the ASSERT_UNREACHABLE()

Did you mean the addition of the ASSERT_UNREACHABLE() to 
arch_handle_hvm_io_completion/handle_pio can moved to separate patches?
Sorry, I don't quite understand, for what benefit?


>    - The addition of the loop in leave_hypervisor_to_guest() as I 
> think it deserve some explanation.

Agree that loop in leave_hypervisor_to_guest() needs explanation. Will 
move to separate patch. But, this way I need to return corresponding 
TODO back to this patch.


>    - The sign extension in handle_ioserv() can possibly be abstracted. 
> Actually the code is quite similar to handle_read().

Ok, will consider that.


>
>>
>>>
>>>>
>>>> Please note, the "PIO handling" TODO is expected to left unaddressed
>>>> for the current series. It is not an big issue for now while Xen
>>>> doesn't have support for vPCI on Arm. On Arm64 they are only used
>>>> for PCI IO Bar and we would probably want to expose them to emulator
>>>> as PIO access to make a DM completely arch-agnostic. So "PIO handling"
>>>> should be implemented when we add support for vPCI.
>>>>
>>>> Please note, at the moment build on Arm32 is broken (see cmpxchg
>>>> usage in hvm_send_buffered_ioreq()) due to the lack of cmpxchg_64
>>>> support on Arm32. There is a patch on review to address this issue:
>>>> https://patchwork.kernel.org/patch/11715559/
>>>
>>> This has been committed.
>>
>> Thank you for the patch, will remove a notice.
>
> For future reference, I think such notice would be better after --- as 
> they don't need to be part of the commit message.

Got it.


>
>
>>
>>>
>>>
>>>> +    if ( dabt.write )
>>>> +        return IO_HANDLED;
>>>> +
>>>> +    /*
>>>> +     * Sign extend if required.
>>>> +     * Note that we expect the read handler to have zeroed the bits
>>>> +     * outside the requested access size.
>>>> +     */
>>>> +    if ( dabt.sign && (r & (1UL << (size - 1))) )
>>>> +    {
>>>> +        /*
>>>> +         * We are relying on register_t using the same as
>>>> +         * an unsigned long in order to keep the 32-bit assembly
>>>> +         * code smaller.
>>>> +         */
>>>> +        BUILD_BUG_ON(sizeof(register_t) != sizeof(unsigned long));
>>>> +        r |= (~0UL) << size;
>>>> +    }
>>>> +
>>>> +    set_user_reg(regs, dabt.reg, r);
>>>> +
>>>> +    return IO_HANDLED;
>>>> +}
>>>> +
>>>> +enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
>>>> +                             struct vcpu *v, mmio_info_t *info)
>>>> +{
>>>> +    struct hvm_vcpu_io *vio = &v->arch.hvm.hvm_io;
>>>> +    ioreq_t p = {
>>>> +        .type = IOREQ_TYPE_COPY,
>>>> +        .addr = info->gpa,
>>>> +        .size = 1 << info->dabt.size,
>>>> +        .count = 1,
>>>> +        .dir = !info->dabt.write,
>>>> +        /*
>>>> +         * On x86, df is used by 'rep' instruction to tell the 
>>>> direction
>>>> +         * to iterate (forward or backward).
>>>> +         * On Arm, all the accesses to MMIO region will do a single
>>>> +         * memory access. So for now, we can safely always set to 0.
>>>> +         */
>>>> +        .df = 0,
>>>> +        .data = get_user_reg(regs, info->dabt.reg),
>>>> +        .state = STATE_IOREQ_READY,
>>>> +    };
>>>> +    struct hvm_ioreq_server *s = NULL;
>>>> +    enum io_state rc;
>>>> +
>>>> +    switch ( vio->io_req.state )
>>>> +    {
>>>> +    case STATE_IOREQ_NONE:
>>>> +        break;
>>>> +
>>>> +    case STATE_IORESP_READY:
>>>> +        return IO_HANDLED;
>>>> +
>>>> +    default:
>>>> +        gdprintk(XENLOG_ERR, "wrong state %u\n", vio->io_req.state);
>>>> +        return IO_ABORT;
>>>> +    }
>>>> +
>>>> +    s = hvm_select_ioreq_server(v->domain, &p);
>>>> +    if ( !s )
>>>> +        return IO_UNHANDLED;
>>>> +
>>>> +    if ( !info->dabt.valid )
>>>> +        return IO_ABORT;
>>>> +
>>>> +    vio->io_req = p;
>>>> +
>>>> +    rc = hvm_send_ioreq(s, &p, 0);
>>>> +    if ( rc != IO_RETRY || v->domain->is_shutting_down )
>>>> +        vio->io_req.state = STATE_IOREQ_NONE;
>>>> +    else if ( !hvm_ioreq_needs_completion(&vio->io_req) )
>>>> +        rc = IO_HANDLED;
>>>> +    else
>>>> +        vio->io_completion = HVMIO_mmio_completion;
>>>> +
>>>> +    return rc;
>>>> +}
>>>> +
>>>> +bool ioreq_handle_complete_mmio(void)
>>>> +{
>>>> +    struct vcpu *v = current;
>>>> +    struct cpu_user_regs *regs = guest_cpu_user_regs();
>>>> +    const union hsr hsr = { .bits = regs->hsr };
>>>> +    paddr_t addr = v->arch.hvm.hvm_io.io_req.addr;
>>>> +
>>>> +    if ( try_handle_mmio(regs, hsr, addr) == IO_HANDLED )
>>>> +    {
>>>> +        advance_pc(regs, hsr);
>>>> +        return true;
>>>> +    }
>>>> +
>>>> +    return false;
>>>> +}
>>>> +
>>>> +/*
>>>> + * Local variables:
>>>> + * mode: C
>>>> + * c-file-style: "BSD"
>>>> + * c-basic-offset: 4
>>>> + * tab-width: 4
>>>> + * indent-tabs-mode: nil
>>>> + * End:
>>>> + */
>>>> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
>>>> index 8f40d0e..121942c 100644
>>>> --- a/xen/arch/arm/traps.c
>>>> +++ b/xen/arch/arm/traps.c
>>>> @@ -21,6 +21,7 @@
>>>>   #include <xen/hypercall.h>
>>>>   #include <xen/init.h>
>>>>   #include <xen/iocap.h>
>>>> +#include <xen/ioreq.h>
>>>>   #include <xen/irq.h>
>>>>   #include <xen/lib.h>
>>>>   #include <xen/mem_access.h>
>>>> @@ -1384,6 +1385,9 @@ static arm_hypercall_t arm_hypercall_table[] = {
>>>>   #ifdef CONFIG_HYPFS
>>>>       HYPERCALL(hypfs_op, 5),
>>>>   #endif
>>>> +#ifdef CONFIG_IOREQ_SERVER
>>>> +    HYPERCALL(dm_op, 3),
>>>> +#endif
>>>>   };
>>>>     #ifndef NDEBUG
>>>> @@ -1955,9 +1959,14 @@ static void 
>>>> do_trap_stage2_abort_guest(struct cpu_user_regs *regs,
>>>>               case IO_HANDLED:
>>>>                   advance_pc(regs, hsr);
>>>>                   return;
>>>> +            case IO_RETRY:
>>>> +                /* finish later */
>>>> +                return;
>>>>               case IO_UNHANDLED:
>>>>                   /* IO unhandled, try another way to handle it. */
>>>>                   break;
>>>> +            default:
>>>> +                ASSERT_UNREACHABLE();
>>>>               }
>>>>           }
>>>>   @@ -2249,12 +2258,23 @@ static void check_for_pcpu_work(void)
>>>>    * Process pending work for the vCPU. Any call should be fast or
>>>>    * implement preemption.
>>>>    */
>>>> -static void check_for_vcpu_work(void)
>>>> +static bool check_for_vcpu_work(void)
>>>>   {
>>>>       struct vcpu *v = current;
>>>>   +#ifdef CONFIG_IOREQ_SERVER
>>>> +    bool handled;
>>>> +
>>>> +    local_irq_enable();
>>>> +    handled = handle_hvm_io_completion(v);
>>>> +    local_irq_disable();
>>>> +
>>>> +    if ( !handled )
>>>> +        return true;
>>>> +#endif
>>>> +
>>>>       if ( likely(!v->arch.need_flush_to_ram) )
>>>> -        return;
>>>> +        return false;
>>>>         /*
>>>>        * Give a chance for the pCPU to process work before handling 
>>>> the vCPU
>>>> @@ -2265,6 +2285,8 @@ static void check_for_vcpu_work(void)
>>>>       local_irq_enable();
>>>>       p2m_flush_vm(v);
>>>>       local_irq_disable();
>>>> +
>>>> +    return false;
>>>>   }
>>>>     /*
>>>> @@ -2277,8 +2299,10 @@ void leave_hypervisor_to_guest(void)
>>>>   {
>>>>       local_irq_disable();
>>>>   -    check_for_vcpu_work();
>>>> -    check_for_pcpu_work();
>>>> +    do
>>>> +    {
>>>> +        check_for_pcpu_work();
>>>> +    } while ( check_for_vcpu_work() );
>>>>         vgic_sync_to_lrs();
>>>>   diff --git a/xen/include/asm-arm/domain.h 
>>>> b/xen/include/asm-arm/domain.h
>>>> index 6819a3b..d1c48d7 100644
>>>> --- a/xen/include/asm-arm/domain.h
>>>> +++ b/xen/include/asm-arm/domain.h
>>>> @@ -11,10 +11,27 @@
>>>>   #include <asm/vgic.h>
>>>>   #include <asm/vpl011.h>
>>>>   #include <public/hvm/params.h>
>>>> +#include <public/hvm/dm_op.h>
>>>> +#include <public/hvm/ioreq.h>
>>>> +
>>>> +#define MAX_NR_IOREQ_SERVERS 8
>>>>     struct hvm_domain
>>>>   {
>>>>       uint64_t              params[HVM_NR_PARAMS];
>>>> +
>>>> +    /* Guest page range used for non-default ioreq servers */
>>>> +    struct {
>>>> +        unsigned long base;
>>>> +        unsigned long mask;
>>>> +        unsigned long legacy_mask; /* indexed by HVM param number */
>>>> +    } ioreq_gfn;
>>>> +
>>>> +    /* Lock protects all other values in the sub-struct and the 
>>>> default */
>>>> +    struct {
>>>> +        spinlock_t              lock;
>>>> +        struct hvm_ioreq_server *server[MAX_NR_IOREQ_SERVERS];
>>>> +    } ioreq_server;
>>>>   };
>>>>     #ifdef CONFIG_ARM_64
>>>> @@ -91,6 +108,28 @@ struct arch_domain
>>>>   #endif
>>>>   }  __cacheline_aligned;
>>>>   +enum hvm_io_completion {
>>>> +    HVMIO_no_completion,
>>>> +    HVMIO_mmio_completion,
>>>> +    HVMIO_pio_completion
>>>> +};
>>>> +
>>>> +struct hvm_vcpu_io {
>>>> +    /* I/O request in flight to device model. */
>>>> +    enum hvm_io_completion io_completion;
>>>> +    ioreq_t                io_req;
>>>> +
>>>> +    /*
>>>> +     * HVM emulation:
>>>> +     *  Linear address @mmio_gla maps to MMIO physical frame 
>>>> @mmio_gpfn.
>>>> +     *  The latter is known to be an MMIO frame (not RAM).
>>>> +     *  This translation is only valid for accesses as per 
>>>> @mmio_access.
>>>> +     */
>>>> +    struct npfec        mmio_access;
>>>> +    unsigned long       mmio_gla;
>>>> +    unsigned long       mmio_gpfn;
>>>> +};
>>>> +
>>>
>>> Why do we need to re-define most of this? Can't this just be in 
>>> common code?
>>
>> Jan asked almost the similar question in "[PATCH V1 02/16] xen/ioreq: 
>> Make x86's IOREQ feature common".
>> Please see my answer there:
>> https://patchwork.kernel.org/patch/11769105/#23637511
>>
>> Theoretically we could move this to the common code, but the question 
>> is how to be with other struct fields the x86's struct hvm_vcpu_io 
>> has/needs but
>> Arm's seems not, would it be possible to logically split struct 
>> hvm_vcpu_io into common and arch parts?
>>
>> struct hvm_vcpu_io {
>>      /* I/O request in flight to device model. */
>>      enum hvm_io_completion io_completion;
>>      ioreq_t                io_req;
>>
>>      /*
>>       * HVM emulation:
>>       *  Linear address @mmio_gla maps to MMIO physical frame 
>> @mmio_gpfn.
>>       *  The latter is known to be an MMIO frame (not RAM).
>>       *  This translation is only valid for accesses as per 
>> @mmio_access.
>>       */
>>      struct npfec        mmio_access;
>>      unsigned long       mmio_gla;
>>      unsigned long       mmio_gpfn;
>>
>>      /*
>>       * We may need to handle up to 3 distinct memory accesses per
>>       * instruction.
>>       */
>>      struct hvm_mmio_cache mmio_cache[3];
>>      unsigned int mmio_cache_count;
>>
>>      /* For retries we shouldn't re-fetch the instruction. */
>>      unsigned int mmio_insn_bytes;
>>      unsigned char mmio_insn[16];
>>      struct hvmemul_cache *cache;
>>
>>      /*
>>       * For string instruction emulation we need to be able to signal a
>>       * necessary retry through other than function return codes.
>>       */
>>      bool_t mmio_retry;
>>
>>      unsigned long msix_unmask_address;
>>      unsigned long msix_snoop_address;
>>      unsigned long msix_snoop_gpa;
>>
>>      const struct g2m_ioport *g2m_ioport;
>> };
>
> I think Jan made some suggestion today. Let me know if you require 
> more input.


Yes. I am considering this now. I provided my thoughts on that a little 
bit earlier. Could you please clarify there.
Jan Beulich Sept. 25, 2020, 6:51 a.m. UTC | #12
On 24.09.2020 20:02, Oleksandr wrote:
> On 24.09.20 19:02, Oleksandr wrote:
> Julien, I noticed that three fields mmio* are not used within current 
> series on Arm. Do we expect them to be used later on?
> I would rather not add fields which are not used. We could add them when 
> needed.
> 
> Would be the following acceptable?
> 1. Both fields "io_completion" and "io_req" (which seems to be the only 
> fields used in common/ioreq.c) are moved to common struct vcpu as part 
> of struct vcpu_io,
>      enum hvm_io_completion is also moved (and renamed).
> 2. We remove everything related to hvm_vcpu* from the Arm header.
> 3. x86's struct hvm_vcpu_io stays as it is (but without two fields 
> "io_completion" and "io_req").
>      I think, this way we separate a common part and reduce Xen changes 
> (which are getting bigger).

If this works, it would be my preference too. So far I was left
under the impression that you did move / mention further fields
for a reason.

Jan
Oleksandr Tyshchenko Sept. 25, 2020, 9:47 a.m. UTC | #13
On 25.09.20 09:51, Jan Beulich wrote:

Hi Jan

> On 24.09.2020 20:02, Oleksandr wrote:
>> On 24.09.20 19:02, Oleksandr wrote:
>> Julien, I noticed that three fields mmio* are not used within current
>> series on Arm. Do we expect them to be used later on?
>> I would rather not add fields which are not used. We could add them when
>> needed.
>>
>> Would be the following acceptable?
>> 1. Both fields "io_completion" and "io_req" (which seems to be the only
>> fields used in common/ioreq.c) are moved to common struct vcpu as part
>> of struct vcpu_io,
>>       enum hvm_io_completion is also moved (and renamed).
>> 2. We remove everything related to hvm_vcpu* from the Arm header.
>> 3. x86's struct hvm_vcpu_io stays as it is (but without two fields
>> "io_completion" and "io_req").
>>       I think, this way we separate a common part and reduce Xen changes
>> (which are getting bigger).
> If this works, it would be my preference too.

Thanks. I will wait for Julien's input on that and if he doesn't have 
any objections I will go this direction.
Julien Grall Sept. 26, 2020, 1:12 p.m. UTC | #14
Hi,

On 24/09/2020 19:02, Oleksandr wrote:
> On 24.09.20 19:02, Oleksandr wrote:
>> On 24.09.20 14:08, Jan Beulich wrote:
>>> On 23.09.2020 22:16, Oleksandr wrote:
>>>> On 23.09.20 21:03, Julien Grall wrote:
>>>>> On 10/09/2020 21:22, Oleksandr Tyshchenko wrote:
>>>>>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>>>>>> @@ -91,6 +108,28 @@ struct arch_domain
>>>>>>    #endif
>>>>>>    }  __cacheline_aligned;
>>>>>>    +enum hvm_io_completion {
>>>>>> +    HVMIO_no_completion,
>>>>>> +    HVMIO_mmio_completion,
>>>>>> +    HVMIO_pio_completion
>>>>>> +};
>>>>>> +
>>>>>> +struct hvm_vcpu_io {
>>>>>> +    /* I/O request in flight to device model. */
>>>>>> +    enum hvm_io_completion io_completion;
>>>>>> +    ioreq_t                io_req;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * HVM emulation:
>>>>>> +     *  Linear address @mmio_gla maps to MMIO physical frame
>>>>>> @mmio_gpfn.
>>>>>> +     *  The latter is known to be an MMIO frame (not RAM).
>>>>>> +     *  This translation is only valid for accesses as per
>>>>>> @mmio_access.
>>>>>> +     */
>>>>>> +    struct npfec        mmio_access;
>>>>>> +    unsigned long       mmio_gla;
>>>>>> +    unsigned long       mmio_gpfn;
>>>>>> +};
>>>>>> +
>>>>> Why do we need to re-define most of this? Can't this just be in common
>>>>> code?
>>>> Jan asked almost the similar question in "[PATCH V1 02/16] xen/ioreq:
>>>> Make x86's IOREQ feature common".
>>>> Please see my answer there:
>>>> https://patchwork.kernel.org/patch/11769105/#23637511
>>>>
>>>> Theoretically we could move this to the common code, but the 
>>>> question is
>>>> how to be with other struct fields the x86's struct hvm_vcpu_io
>>>> has/needs but
>>>> Arm's seems not, would it be possible to logically split struct
>>>> hvm_vcpu_io into common and arch parts?
>>> Have struct vcpu_io and struct arch_vcpu_io as a sub-part of it?
>> Although it is going to pull a lot of changes into x86/hvm/*, yes this 
>> way
>> we indeed could logically split struct hvm_vcpu_io into common and 
>> arch parts in a clear way.
>> If it is really worth it, I will start looking into it.
> Julien, I noticed that three fields mmio* are not used within current 
> series on Arm. Do we expect them to be used later on?

IIRC, I just copied them blindly when writing the PoC.

The information can already be found using the HSR (syndrome register), 
so those fields would be redundant for us.

> Would be the following acceptable?
> 1. Both fields "io_completion" and "io_req" (which seems to be the only 
> fields used in common/ioreq.c) are moved to common struct vcpu as part 
> of struct vcpu_io,
>      enum hvm_io_completion is also moved (and renamed).
> 2. We remove everything related to hvm_vcpu* from the Arm header.
> 3. x86's struct hvm_vcpu_io stays as it is (but without two fields 
> "io_completion" and "io_req").
>      I think, this way we separate a common part and reduce Xen changes 
> (which are getting bigger).

The plan looks reasonable to me.

Cheers,
Oleksandr Tyshchenko Sept. 26, 2020, 1:18 p.m. UTC | #15
On 26.09.20 16:12, Julien Grall wrote:
> Hi,

Hi Julien.


>
> On 24/09/2020 19:02, Oleksandr wrote:
>> On 24.09.20 19:02, Oleksandr wrote:
>>> On 24.09.20 14:08, Jan Beulich wrote:
>>>> On 23.09.2020 22:16, Oleksandr wrote:
>>>>> On 23.09.20 21:03, Julien Grall wrote:
>>>>>> On 10/09/2020 21:22, Oleksandr Tyshchenko wrote:
>>>>>>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>>>>>>> @@ -91,6 +108,28 @@ struct arch_domain
>>>>>>>    #endif
>>>>>>>    }  __cacheline_aligned;
>>>>>>>    +enum hvm_io_completion {
>>>>>>> +    HVMIO_no_completion,
>>>>>>> +    HVMIO_mmio_completion,
>>>>>>> +    HVMIO_pio_completion
>>>>>>> +};
>>>>>>> +
>>>>>>> +struct hvm_vcpu_io {
>>>>>>> +    /* I/O request in flight to device model. */
>>>>>>> +    enum hvm_io_completion io_completion;
>>>>>>> +    ioreq_t                io_req;
>>>>>>> +
>>>>>>> +    /*
>>>>>>> +     * HVM emulation:
>>>>>>> +     *  Linear address @mmio_gla maps to MMIO physical frame
>>>>>>> @mmio_gpfn.
>>>>>>> +     *  The latter is known to be an MMIO frame (not RAM).
>>>>>>> +     *  This translation is only valid for accesses as per
>>>>>>> @mmio_access.
>>>>>>> +     */
>>>>>>> +    struct npfec        mmio_access;
>>>>>>> +    unsigned long       mmio_gla;
>>>>>>> +    unsigned long       mmio_gpfn;
>>>>>>> +};
>>>>>>> +
>>>>>> Why do we need to re-define most of this? Can't this just be in 
>>>>>> common
>>>>>> code?
>>>>> Jan asked almost the similar question in "[PATCH V1 02/16] xen/ioreq:
>>>>> Make x86's IOREQ feature common".
>>>>> Please see my answer there:
>>>>> https://patchwork.kernel.org/patch/11769105/#23637511
>>>>>
>>>>> Theoretically we could move this to the common code, but the 
>>>>> question is
>>>>> how to be with other struct fields the x86's struct hvm_vcpu_io
>>>>> has/needs but
>>>>> Arm's seems not, would it be possible to logically split struct
>>>>> hvm_vcpu_io into common and arch parts?
>>>> Have struct vcpu_io and struct arch_vcpu_io as a sub-part of it?
>>> Although it is going to pull a lot of changes into x86/hvm/*, yes 
>>> this way
>>> we indeed could logically split struct hvm_vcpu_io into common and 
>>> arch parts in a clear way.
>>> If it is really worth it, I will start looking into it.
>> Julien, I noticed that three fields mmio* are not used within current 
>> series on Arm. Do we expect them to be used later on?
>
> IIRC, I just copied them blindly when writing the PoC.
>
> The information can already be found using the HSR (syndrome 
> register), so those fields would be redundant for us.

Got it.


>
>
>> Would be the following acceptable?
>> 1. Both fields "io_completion" and "io_req" (which seems to be the 
>> only fields used in common/ioreq.c) are moved to common struct vcpu 
>> as part of struct vcpu_io,
>>      enum hvm_io_completion is also moved (and renamed).
>> 2. We remove everything related to hvm_vcpu* from the Arm header.
>> 3. x86's struct hvm_vcpu_io stays as it is (but without two fields 
>> "io_completion" and "io_req").
>>      I think, this way we separate a common part and reduce Xen 
>> changes (which are getting bigger).
>
> The plan looks reasonable to me.

OK, will follow it. Thank you
Julien Grall Sept. 26, 2020, 1:21 p.m. UTC | #16
Hi Oleksandr,

On 24/09/2020 19:22, Oleksandr wrote:
> On 24.09.20 20:25, Julien Grall wrote:
>> On 23/09/2020 21:16, Oleksandr wrote:
>>> On 23.09.20 21:03, Julien Grall wrote:
>>>> On 10/09/2020 21:22, Oleksandr Tyshchenko wrote:
>>>>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>>> Could you please clarify how this patch could be split in smaller one?
>>
>> This patch is going to be reduced a fair bit if you make some of the 
>> structure common. The next steps would be to move anything that is not 
>> directly related to IOREQ out.
> 
> 
> Thank you for the clarification.
> Yes, however, I believed everything in this patch is directly related to 
> IOREQ...
> 
> 
>>
>>
>> From a quick look, there are few things that can be moved in separate 
>> patches:
>>    - The addition of the ASSERT_UNREACHABLE()
> 
> Did you mean the addition of the ASSERT_UNREACHABLE() to 
> arch_handle_hvm_io_completion/handle_pio can moved to separate patches?
> Sorry, I don't quite understand, for what benefit?

Sorry I didn't realize there was multiple ASSERT_UNREACHABLE() in the 
code. I was referring to the one in the follow chunk:

@@ -1955,9 +1959,14 @@ static void do_trap_stage2_abort_guest(struct 
cpu_user_regs *regs,
              case IO_HANDLED:
                  advance_pc(regs, hsr);
                  return;
+            case IO_RETRY:
+                /* finish later */
+                return;
              case IO_UNHANDLED:
                  /* IO unhandled, try another way to handle it. */
                  break;
+            default:
+                ASSERT_UNREACHABLE();
              }
          }

While I understand the reason this was added, to me this doesn't seem to 
be directly related to this patch.

In fact, the switch case will be done on an enum. So without the 
default, the compiler will be able to notice if we are adding a new 
field. With this new approach, you would only notice at runtime 
(assuming the path is exercised).

So what do we gain?

[...]

>> I think Jan made some suggestion today. Let me know if you require 
>> more input.
> 
> 
> Yes. I am considering this now. I provided my thoughts on that a little 
> bit earlier. Could you please clarify there.

I have replied to it.

Cheers,
Oleksandr Tyshchenko Sept. 26, 2020, 2:57 p.m. UTC | #17
On 26.09.20 16:21, Julien Grall wrote:
> Hi Oleksandr,

Hi Julien.


>
> On 24/09/2020 19:22, Oleksandr wrote:
>> On 24.09.20 20:25, Julien Grall wrote:
>>> On 23/09/2020 21:16, Oleksandr wrote:
>>>> On 23.09.20 21:03, Julien Grall wrote:
>>>>> On 10/09/2020 21:22, Oleksandr Tyshchenko wrote:
>>>>>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>>>> Could you please clarify how this patch could be split in smaller one?
>>>
>>> This patch is going to be reduced a fair bit if you make some of the 
>>> structure common. The next steps would be to move anything that is 
>>> not directly related to IOREQ out.
>>
>>
>> Thank you for the clarification.
>> Yes, however, I believed everything in this patch is directly related 
>> to IOREQ...
>>
>>
>>>
>>>
>>> From a quick look, there are few things that can be moved in 
>>> separate patches:
>>>    - The addition of the ASSERT_UNREACHABLE()
>>
>> Did you mean the addition of the ASSERT_UNREACHABLE() to 
>> arch_handle_hvm_io_completion/handle_pio can moved to separate patches?
>> Sorry, I don't quite understand, for what benefit?
>
> Sorry I didn't realize there was multiple ASSERT_UNREACHABLE() in the 
> code. I was referring to the one in the follow chunk:
>
> @@ -1955,9 +1959,14 @@ static void do_trap_stage2_abort_guest(struct 
> cpu_user_regs *regs,
>              case IO_HANDLED:
>                  advance_pc(regs, hsr);
>                  return;
> +            case IO_RETRY:
> +                /* finish later */
> +                return;
>              case IO_UNHANDLED:
>                  /* IO unhandled, try another way to handle it. */
>                  break;
> +            default:
> +                ASSERT_UNREACHABLE();
>              }
>          }
>
> While I understand the reason this was added, to me this doesn't seem 
> to be directly related to this patch.
>
> In fact, the switch case will be done on an enum. So without the 
> default, the compiler will be able to notice if we are adding a new 
> field. With this new approach, you would only notice at runtime 
> (assuming the path is exercised).
>
> So what do we gain?

Hmm, now I am in doubt whether we really need to put 
ASSERT_UNREACHABLE() here. Also we would notice it at the runtime for 
debug builds only.


>
> [...]
>
>>> I think Jan made some suggestion today. Let me know if you require 
>>> more input.
>>
>>
>> Yes. I am considering this now. I provided my thoughts on that a 
>> little bit earlier. Could you please clarify there.
>
> I have replied to it.

Thank you.
diff mbox series

Patch

diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index 2777388..8264cd6 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -21,6 +21,7 @@  config ARM
 	select HAS_PASSTHROUGH
 	select HAS_PDX
 	select IOMMU_FORCE_PT_SHARE
+	select IOREQ_SERVER
 
 config ARCH_DEFCONFIG
 	string
diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
index 7e82b21..617fa3e 100644
--- a/xen/arch/arm/Makefile
+++ b/xen/arch/arm/Makefile
@@ -13,6 +13,7 @@  obj-y += cpuerrata.o
 obj-y += cpufeature.o
 obj-y += decode.o
 obj-y += device.o
+obj-$(CONFIG_IOREQ_SERVER) += dm.o
 obj-y += domain.o
 obj-y += domain_build.init.o
 obj-y += domctl.o
@@ -27,6 +28,7 @@  obj-y += guest_atomics.o
 obj-y += guest_walk.o
 obj-y += hvm.o
 obj-y += io.o
+obj-$(CONFIG_IOREQ_SERVER) += ioreq.o
 obj-y += irq.o
 obj-y += kernel.init.o
 obj-$(CONFIG_LIVEPATCH) += livepatch.o
diff --git a/xen/arch/arm/dm.c b/xen/arch/arm/dm.c
new file mode 100644
index 0000000..eb20344
--- /dev/null
+++ b/xen/arch/arm/dm.c
@@ -0,0 +1,33 @@ 
+/*
+ * Copyright (c) 2019 Arm ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/hypercall.h>
+
+int arch_dm_op(struct xen_dm_op *op, struct domain *d,
+               const struct dmop_args *op_args, bool *const_op)
+{
+    return -EOPNOTSUPP;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 3116932..043db3f 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -14,6 +14,7 @@ 
 #include <xen/grant_table.h>
 #include <xen/hypercall.h>
 #include <xen/init.h>
+#include <xen/ioreq.h>
 #include <xen/lib.h>
 #include <xen/livepatch.h>
 #include <xen/sched.h>
@@ -681,6 +682,10 @@  int arch_domain_create(struct domain *d,
 
     ASSERT(config != NULL);
 
+#ifdef CONFIG_IOREQ_SERVER
+    hvm_ioreq_init(d);
+#endif
+
     /* p2m_init relies on some value initialized by the IOMMU subsystem */
     if ( (rc = iommu_domain_init(d, config->iommu_opts)) != 0 )
         goto fail;
@@ -999,6 +1004,10 @@  int domain_relinquish_resources(struct domain *d)
         if (ret )
             return ret;
 
+#ifdef CONFIG_IOREQ_SERVER
+        hvm_destroy_all_ioreq_servers(d);
+#endif
+
     PROGRESS(xen):
         ret = relinquish_memory(d, &d->xenpage_list);
         if ( ret )
diff --git a/xen/arch/arm/io.c b/xen/arch/arm/io.c
index ae7ef96..adc9de7 100644
--- a/xen/arch/arm/io.c
+++ b/xen/arch/arm/io.c
@@ -16,6 +16,7 @@ 
  * GNU General Public License for more details.
  */
 
+#include <xen/ioreq.h>
 #include <xen/lib.h>
 #include <xen/spinlock.h>
 #include <xen/sched.h>
@@ -123,7 +124,15 @@  enum io_state try_handle_mmio(struct cpu_user_regs *regs,
 
     handler = find_mmio_handler(v->domain, info.gpa);
     if ( !handler )
-        return IO_UNHANDLED;
+    {
+        int rc;
+
+        rc = try_fwd_ioserv(regs, v, &info);
+        if ( rc == IO_HANDLED )
+            return handle_ioserv(regs, v);
+
+        return rc;
+    }
 
     /* All the instructions used on emulated MMIO region should be valid */
     if ( !dabt.valid )
diff --git a/xen/arch/arm/ioreq.c b/xen/arch/arm/ioreq.c
new file mode 100644
index 0000000..e493c5b
--- /dev/null
+++ b/xen/arch/arm/ioreq.c
@@ -0,0 +1,142 @@ 
+/*
+ * arm/ioreq.c: hardware virtual machine I/O emulation
+ *
+ * Copyright (c) 2019 Arm ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/domain.h>
+#include <xen/ioreq.h>
+
+#include <public/hvm/ioreq.h>
+
+#include <asm/traps.h>
+
+enum io_state handle_ioserv(struct cpu_user_regs *regs, struct vcpu *v)
+{
+    const union hsr hsr = { .bits = regs->hsr };
+    const struct hsr_dabt dabt = hsr.dabt;
+    /* Code is similar to handle_read */
+    uint8_t size = (1 << dabt.size) * 8;
+    register_t r = v->arch.hvm.hvm_io.io_req.data;
+
+    /* We are done with the IO */
+    v->arch.hvm.hvm_io.io_req.state = STATE_IOREQ_NONE;
+
+    /* XXX: Do we need to take care of write here ? */
+    if ( dabt.write )
+        return IO_HANDLED;
+
+    /*
+     * Sign extend if required.
+     * Note that we expect the read handler to have zeroed the bits
+     * outside the requested access size.
+     */
+    if ( dabt.sign && (r & (1UL << (size - 1))) )
+    {
+        /*
+         * We are relying on register_t using the same as
+         * an unsigned long in order to keep the 32-bit assembly
+         * code smaller.
+         */
+        BUILD_BUG_ON(sizeof(register_t) != sizeof(unsigned long));
+        r |= (~0UL) << size;
+    }
+
+    set_user_reg(regs, dabt.reg, r);
+
+    return IO_HANDLED;
+}
+
+enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
+                             struct vcpu *v, mmio_info_t *info)
+{
+    struct hvm_vcpu_io *vio = &v->arch.hvm.hvm_io;
+    ioreq_t p = {
+        .type = IOREQ_TYPE_COPY,
+        .addr = info->gpa,
+        .size = 1 << info->dabt.size,
+        .count = 1,
+        .dir = !info->dabt.write,
+        /*
+         * On x86, df is used by 'rep' instruction to tell the direction
+         * to iterate (forward or backward).
+         * On Arm, all the accesses to MMIO region will do a single
+         * memory access. So for now, we can safely always set to 0.
+         */
+        .df = 0,
+        .data = get_user_reg(regs, info->dabt.reg),
+        .state = STATE_IOREQ_READY,
+    };
+    struct hvm_ioreq_server *s = NULL;
+    enum io_state rc;
+
+    switch ( vio->io_req.state )
+    {
+    case STATE_IOREQ_NONE:
+        break;
+
+    case STATE_IORESP_READY:
+        return IO_HANDLED;
+
+    default:
+        gdprintk(XENLOG_ERR, "wrong state %u\n", vio->io_req.state);
+        return IO_ABORT;
+    }
+
+    s = hvm_select_ioreq_server(v->domain, &p);
+    if ( !s )
+        return IO_UNHANDLED;
+
+    if ( !info->dabt.valid )
+        return IO_ABORT;
+
+    vio->io_req = p;
+
+    rc = hvm_send_ioreq(s, &p, 0);
+    if ( rc != IO_RETRY || v->domain->is_shutting_down )
+        vio->io_req.state = STATE_IOREQ_NONE;
+    else if ( !hvm_ioreq_needs_completion(&vio->io_req) )
+        rc = IO_HANDLED;
+    else
+        vio->io_completion = HVMIO_mmio_completion;
+
+    return rc;
+}
+
+bool ioreq_handle_complete_mmio(void)
+{
+    struct vcpu *v = current;
+    struct cpu_user_regs *regs = guest_cpu_user_regs();
+    const union hsr hsr = { .bits = regs->hsr };
+    paddr_t addr = v->arch.hvm.hvm_io.io_req.addr;
+
+    if ( try_handle_mmio(regs, hsr, addr) == IO_HANDLED )
+    {
+        advance_pc(regs, hsr);
+        return true;
+    }
+
+    return false;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index 8f40d0e..121942c 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -21,6 +21,7 @@ 
 #include <xen/hypercall.h>
 #include <xen/init.h>
 #include <xen/iocap.h>
+#include <xen/ioreq.h>
 #include <xen/irq.h>
 #include <xen/lib.h>
 #include <xen/mem_access.h>
@@ -1384,6 +1385,9 @@  static arm_hypercall_t arm_hypercall_table[] = {
 #ifdef CONFIG_HYPFS
     HYPERCALL(hypfs_op, 5),
 #endif
+#ifdef CONFIG_IOREQ_SERVER
+    HYPERCALL(dm_op, 3),
+#endif
 };
 
 #ifndef NDEBUG
@@ -1955,9 +1959,14 @@  static void do_trap_stage2_abort_guest(struct cpu_user_regs *regs,
             case IO_HANDLED:
                 advance_pc(regs, hsr);
                 return;
+            case IO_RETRY:
+                /* finish later */
+                return;
             case IO_UNHANDLED:
                 /* IO unhandled, try another way to handle it. */
                 break;
+            default:
+                ASSERT_UNREACHABLE();
             }
         }
 
@@ -2249,12 +2258,23 @@  static void check_for_pcpu_work(void)
  * Process pending work for the vCPU. Any call should be fast or
  * implement preemption.
  */
-static void check_for_vcpu_work(void)
+static bool check_for_vcpu_work(void)
 {
     struct vcpu *v = current;
 
+#ifdef CONFIG_IOREQ_SERVER
+    bool handled;
+
+    local_irq_enable();
+    handled = handle_hvm_io_completion(v);
+    local_irq_disable();
+
+    if ( !handled )
+        return true;
+#endif
+
     if ( likely(!v->arch.need_flush_to_ram) )
-        return;
+        return false;
 
     /*
      * Give a chance for the pCPU to process work before handling the vCPU
@@ -2265,6 +2285,8 @@  static void check_for_vcpu_work(void)
     local_irq_enable();
     p2m_flush_vm(v);
     local_irq_disable();
+
+    return false;
 }
 
 /*
@@ -2277,8 +2299,10 @@  void leave_hypervisor_to_guest(void)
 {
     local_irq_disable();
 
-    check_for_vcpu_work();
-    check_for_pcpu_work();
+    do
+    {
+        check_for_pcpu_work();
+    } while ( check_for_vcpu_work() );
 
     vgic_sync_to_lrs();
 
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 6819a3b..d1c48d7 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -11,10 +11,27 @@ 
 #include <asm/vgic.h>
 #include <asm/vpl011.h>
 #include <public/hvm/params.h>
+#include <public/hvm/dm_op.h>
+#include <public/hvm/ioreq.h>
+
+#define MAX_NR_IOREQ_SERVERS 8
 
 struct hvm_domain
 {
     uint64_t              params[HVM_NR_PARAMS];
+
+    /* Guest page range used for non-default ioreq servers */
+    struct {
+        unsigned long base;
+        unsigned long mask;
+        unsigned long legacy_mask; /* indexed by HVM param number */
+    } ioreq_gfn;
+
+    /* Lock protects all other values in the sub-struct and the default */
+    struct {
+        spinlock_t              lock;
+        struct hvm_ioreq_server *server[MAX_NR_IOREQ_SERVERS];
+    } ioreq_server;
 };
 
 #ifdef CONFIG_ARM_64
@@ -91,6 +108,28 @@  struct arch_domain
 #endif
 }  __cacheline_aligned;
 
+enum hvm_io_completion {
+    HVMIO_no_completion,
+    HVMIO_mmio_completion,
+    HVMIO_pio_completion
+};
+
+struct hvm_vcpu_io {
+    /* I/O request in flight to device model. */
+    enum hvm_io_completion io_completion;
+    ioreq_t                io_req;
+
+    /*
+     * HVM emulation:
+     *  Linear address @mmio_gla maps to MMIO physical frame @mmio_gpfn.
+     *  The latter is known to be an MMIO frame (not RAM).
+     *  This translation is only valid for accesses as per @mmio_access.
+     */
+    struct npfec        mmio_access;
+    unsigned long       mmio_gla;
+    unsigned long       mmio_gpfn;
+};
+
 struct arch_vcpu
 {
     struct {
@@ -204,6 +243,11 @@  struct arch_vcpu
      */
     bool need_flush_to_ram;
 
+    struct hvm_vcpu
+    {
+        struct hvm_vcpu_io hvm_io;
+    } hvm;
+
 }  __cacheline_aligned;
 
 void vcpu_show_execution_state(struct vcpu *);
@@ -262,6 +306,8 @@  static inline void arch_vcpu_block(struct vcpu *v) {}
 
 #define arch_vm_assist_valid_mask(d) (1UL << VMASST_TYPE_runstate_update_flag)
 
+#define has_vpci(d)    ({ (void)(d); false; })
+
 #endif /* __ASM_DOMAIN_H__ */
 
 /*
diff --git a/xen/include/asm-arm/hvm/ioreq.h b/xen/include/asm-arm/hvm/ioreq.h
new file mode 100644
index 0000000..1c34df0
--- /dev/null
+++ b/xen/include/asm-arm/hvm/ioreq.h
@@ -0,0 +1,108 @@ 
+/*
+ * hvm.h: Hardware virtual machine assist interface definitions.
+ *
+ * Copyright (c) 2016 Citrix Systems Inc.
+ * Copyright (c) 2019 Arm ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_ARM_HVM_IOREQ_H__
+#define __ASM_ARM_HVM_IOREQ_H__
+
+#include <public/hvm/ioreq.h>
+#include <public/hvm/dm_op.h>
+
+#ifdef CONFIG_IOREQ_SERVER
+enum io_state handle_ioserv(struct cpu_user_regs *regs, struct vcpu *v);
+enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
+                             struct vcpu *v, mmio_info_t *info);
+#else
+static inline enum io_state handle_ioserv(struct cpu_user_regs *regs,
+                                          struct vcpu *v)
+{
+    return IO_UNHANDLED;
+}
+
+static inline enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
+                                           struct vcpu *v, mmio_info_t *info)
+{
+    return IO_UNHANDLED;
+}
+#endif
+
+bool ioreq_handle_complete_mmio(void);
+
+static inline bool handle_pio(uint16_t port, unsigned int size, int dir)
+{
+    /*
+     * TODO: For Arm64, the main user will be PCI. So this should be
+     * implemented when we add support for vPCI.
+     */
+    BUG();
+    return true;
+}
+
+static inline int arch_hvm_destroy_ioreq_server(struct hvm_ioreq_server *s)
+{
+    return 0;
+}
+
+static inline void msix_write_completion(struct vcpu *v)
+{
+}
+
+static inline bool arch_handle_hvm_io_completion(
+    enum hvm_io_completion io_completion)
+{
+    ASSERT_UNREACHABLE();
+}
+
+static inline int hvm_get_ioreq_server_range_type(struct domain *d,
+                                                  ioreq_t *p,
+                                                  uint8_t *type,
+                                                  uint64_t *addr)
+{
+    if ( p->type != IOREQ_TYPE_COPY && p->type != IOREQ_TYPE_PIO )
+        return -EINVAL;
+
+    *type = (p->type == IOREQ_TYPE_PIO) ?
+             XEN_DMOP_IO_RANGE_PORT : XEN_DMOP_IO_RANGE_MEMORY;
+    *addr = p->addr;
+
+    return 0;
+}
+
+static inline void arch_hvm_ioreq_init(struct domain *d)
+{
+}
+
+static inline void arch_hvm_ioreq_destroy(struct domain *d)
+{
+}
+
+#define IOREQ_IO_HANDLED     IO_HANDLED
+#define IOREQ_IO_UNHANDLED   IO_UNHANDLED
+#define IOREQ_IO_RETRY       IO_RETRY
+
+#endif /* __ASM_ARM_HVM_IOREQ_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-arm/mmio.h b/xen/include/asm-arm/mmio.h
index 8dbfb27..7ab873c 100644
--- a/xen/include/asm-arm/mmio.h
+++ b/xen/include/asm-arm/mmio.h
@@ -37,6 +37,7 @@  enum io_state
     IO_ABORT,       /* The IO was handled by the helper and led to an abort. */
     IO_HANDLED,     /* The IO was successfully handled by the helper. */
     IO_UNHANDLED,   /* The IO was not handled by the helper. */
+    IO_RETRY,       /* Retry the emulation for some reason */
 };
 
 typedef int (*mmio_read_t)(struct vcpu *v, mmio_info_t *info,
diff --git a/xen/include/asm-arm/paging.h b/xen/include/asm-arm/paging.h
index 6d1a000..0550c55 100644
--- a/xen/include/asm-arm/paging.h
+++ b/xen/include/asm-arm/paging.h
@@ -4,6 +4,10 @@ 
 #define paging_mode_translate(d)              (1)
 #define paging_mode_external(d)               (1)
 
+static inline void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn)
+{
+}
+
 #endif /* XEN_PAGING_H */
 
 /*