diff mbox series

[v7,02/15] s390x: protvirt: Support unpack facility

Message ID 20200309112206.20261-3-frankja@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390x: Protected Virtualization support | expand

Commit Message

Janosch Frank March 9, 2020, 11:21 a.m. UTC
The unpack facility provides the means to setup a protected guest. A
protected guest can not be introspected by the hypervisor or any
user/administrator of the machine it is running on.

Protected guests are encrypted at rest and need a special boot
mechanism via diag308 subcode 8 and 10.

Code 8 sets the PV specific IPLB which is retained seperately from
those set via code 5.

Code 10 is used to unpack the VM into protected memory, verify its
integrity and start it.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> [Changes
to machine]
---
 hw/s390x/Makefile.objs              |   1 +
 hw/s390x/ipl.c                      |  59 ++++++++++++-
 hw/s390x/ipl.h                      |  72 ++++++++++++++--
 hw/s390x/pv.c                       | 104 +++++++++++++++++++++++
 hw/s390x/pv.h                       |  34 ++++++++
 hw/s390x/s390-virtio-ccw.c          | 127 +++++++++++++++++++++++++++-
 include/hw/s390x/s390-virtio-ccw.h  |   1 +
 target/s390x/cpu.c                  |  17 ++++
 target/s390x/cpu.h                  |   1 +
 target/s390x/cpu_features_def.inc.h |   1 +
 target/s390x/diag.c                 |  34 +++++++-
 11 files changed, 436 insertions(+), 15 deletions(-)
 create mode 100644 hw/s390x/pv.c
 create mode 100644 hw/s390x/pv.h

Comments

David Hildenbrand March 9, 2020, 1:37 p.m. UTC | #1
On 09.03.20 12:21, Janosch Frank wrote:
> The unpack facility provides the means to setup a protected guest. A
> protected guest can not be introspected by the hypervisor or any
> user/administrator of the machine it is running on.
> 
> Protected guests are encrypted at rest and need a special boot
> mechanism via diag308 subcode 8 and 10.
> 
> Code 8 sets the PV specific IPLB which is retained seperately from
> those set via code 5.
> 
> Code 10 is used to unpack the VM into protected memory, verify its
> integrity and start it.
> 
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> [Changes
> to machine]

As you signed this patch off, Maybe this should rather be a Co-developed-by:

[...]

>  {
>      S390IPLState *ipl = get_ipl_device();
> @@ -561,7 +581,8 @@ void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type)
>  {
>      S390IPLState *ipl = get_ipl_device();
>  
> -    if (reset_type == S390_RESET_EXTERNAL || reset_type == S390_RESET_REIPL) {
> +    if (reset_type == S390_RESET_EXTERNAL || reset_type == S390_RESET_REIPL ||
> +        reset_type == S390_RESET_PV) {
>          /* use CPU 0 for full resets */
>          ipl->reset_cpu_index = 0;

This looks wrong. In case of an error, you modify the registers of a
theoretically unrelated CPU.


>      } else {
> @@ -635,6 +656,38 @@ static void s390_ipl_prepare_qipl(S390CPU *cpu)
>      cpu_physical_memory_unmap(addr, len, 1, len);
>  }
>  
> +int s390_ipl_prepare_pv_header(void)
> +{
> +    S390IPLState *ipl = get_ipl_device();
> +    IPLBlockPV *ipib_pv = &ipl->iplb_pv.pv;
> +    void *hdr = g_malloc(ipib_pv->pv_header_len);

Should there be an upper limit? The guest can allocate quite some memory
this way and theoretically crash the VM.

> +    int rc;
> +
> +    cpu_physical_memory_read(ipib_pv->pv_header_addr, hdr,
> +                             ipib_pv->pv_header_len);

Shouldn't we validate if this memory is accessible at all?

> +    rc = s390_pv_set_sec_parms((uint64_t)hdr,
> +                          ipib_pv->pv_header_len);
> +    g_free(hdr);
> +    return rc;
> +}
> +
> +int s390_ipl_pv_unpack(void)
> +{
> +    int i, rc = 0;

NIT: These declarations last.

> +    S390IPLState *ipl = get_ipl_device();
> +    IPLBlockPV *ipib_pv = &ipl->iplb_pv.pv;

use s390_ipl_get_iplb_pv() and assert that we don't get NULL?

> +
> +    for (i = 0; i < ipib_pv->num_comp; i++) {
> +        rc = s390_pv_unpack(ipib_pv->components[i].addr,
> +                            TARGET_PAGE_ALIGN(ipib_pv->components[i].size),
> +                            ipib_pv->components[i].tweak_pref);
> +        if (rc) {
> +            break;
> +        }
> +    }
> +    return rc;
> +}
> +
>  void s390_ipl_prepare_cpu(S390CPU *cpu)
>  {
>      S390IPLState *ipl = get_ipl_device();
> diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
> index d4813105db..b2ccdd9dae 100644
> --- a/hw/s390x/ipl.h
> +++ b/hw/s390x/ipl.h
> @@ -15,6 +15,24 @@
>  #include "cpu.h"
>  #include "hw/qdev-core.h"
>  
> +struct IPLBlockPVComp {
> +    uint64_t tweak_pref;
> +    uint64_t addr;
> +    uint64_t size;
> +} QEMU_PACKED;
> +typedef struct IPLBlockPVComp IPLBlockPVComp;
> +
> +struct IPLBlockPV {
> +    uint8_t  reserved18[87];    /* 0x18 */
> +    uint8_t  version;           /* 0x6f */
> +    uint32_t reserved70;        /* 0x70 */
> +    uint32_t num_comp;          /* 0x70 */
> +    uint64_t pv_header_addr;    /* 0x74 */
> +    uint64_t pv_header_len;     /* 0x7c */
> +    struct IPLBlockPVComp components[];
> +} QEMU_PACKED;
> +typedef struct IPLBlockPV IPLBlockPV;
> +
>  struct IplBlockCcw {
>      uint8_t  reserved0[85];
>      uint8_t  ssid;
> @@ -71,6 +89,7 @@ union IplParameterBlock {
>          union {
>              IplBlockCcw ccw;
>              IplBlockFcp fcp;
> +            IPLBlockPV pv;
>              IplBlockQemuScsi scsi;
>          };
>      } QEMU_PACKED;
> @@ -85,8 +104,11 @@ typedef union IplParameterBlock IplParameterBlock;
>  
>  int s390_ipl_set_loadparm(uint8_t *loadparm);
>  void s390_ipl_update_diag308(IplParameterBlock *iplb);
> +int s390_ipl_prepare_pv_header(void);
> +int s390_ipl_pv_unpack(void);
>  void s390_ipl_prepare_cpu(S390CPU *cpu);
>  IplParameterBlock *s390_ipl_get_iplb(void);
> +IplParameterBlock *s390_ipl_get_iplb_pv(void);
>  
>  enum s390_reset {
>      /* default is a reset not triggered by a CPU e.g. issued by QMP */
> @@ -94,6 +116,7 @@ enum s390_reset {
>      S390_RESET_REIPL,
>      S390_RESET_MODIFIED_CLEAR,
>      S390_RESET_LOAD_NORMAL,
> +    S390_RESET_PV,
>  };
>  void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type);
>  void s390_ipl_get_reset_request(CPUState **cs, enum s390_reset *reset_type);
> @@ -133,6 +156,7 @@ struct S390IPLState {
>      /*< private >*/
>      DeviceState parent_obj;
>      IplParameterBlock iplb;
> +    IplParameterBlock iplb_pv;
>      QemuIplParameters qipl;
>      uint64_t start_addr;
>      uint64_t compat_start_addr;
> @@ -140,6 +164,7 @@ struct S390IPLState {
>      uint64_t compat_bios_start_addr;
>      bool enforce_bios;
>      bool iplb_valid;
> +    bool iplb_valid_pv;
>      bool netboot;
>      /* reset related properties don't have to be migrated or reset */
>      enum s390_reset reset_type;
> @@ -161,9 +186,11 @@ QEMU_BUILD_BUG_MSG(offsetof(S390IPLState, iplb) & 3, "alignment of iplb wrong");
>  
>  #define S390_IPL_TYPE_FCP 0x00
>  #define S390_IPL_TYPE_CCW 0x02
> +#define S390_IPL_TYPE_PV 0x05
>  #define S390_IPL_TYPE_QEMU_SCSI 0xff
>  
>  #define S390_IPLB_HEADER_LEN 8
> +#define S390_IPLB_MIN_PV_LEN 148
>  #define S390_IPLB_MIN_CCW_LEN 200
>  #define S390_IPLB_MIN_FCP_LEN 384
>  #define S390_IPLB_MIN_QEMU_SCSI_LEN 200
> @@ -173,16 +200,49 @@ static inline bool iplb_valid_len(IplParameterBlock *iplb)
>      return be32_to_cpu(iplb->len) <= sizeof(IplParameterBlock);
>  }
>  
> -static inline bool iplb_valid_ccw(IplParameterBlock *iplb)
> +static inline bool s390_ipl_pv_check_components(IplParameterBlock *iplb)

Still don't like the function name :)

ipl_valid_pv_components() ?

>  {
> -    return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_CCW_LEN &&
> -           iplb->pbt == S390_IPL_TYPE_CCW;
> +    IPLBlockPV *ipib_pv = &iplb->pv;
> +    int i;
> +
> +    if (ipib_pv->num_comp == 0) {
> +        return false;
> +    }
> +
> +    for (i = 0; i < ipib_pv->num_comp; i++) {
> +        /* Addr must be 4k aligned */
> +        if (ipib_pv->components[i].addr & ~TARGET_PAGE_MASK) {
> +            return false;
> +        }
> +
> +        /* Tweak prefix is monotonically increasing with each component */
> +        if (i < ipib_pv->num_comp - 1 &&
> +            ipib_pv->components[i].tweak_pref >=
> +            ipib_pv->components[i + 1].tweak_pref) {
> +            return false;
> +        }
> +    }
> +    return true;
>  }
>  
> -static inline bool iplb_valid_fcp(IplParameterBlock *iplb)
> +static inline bool iplb_valid(IplParameterBlock *iplb)
>  {
> -    return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_FCP_LEN &&
> -           iplb->pbt == S390_IPL_TYPE_FCP;
> +    switch (iplb->pbt) {
> +        case S390_IPL_TYPE_FCP:
> +            return (be32_to_cpu(iplb->len) >= S390_IPLB_MIN_FCP_LEN &&
> +                    iplb->pbt == S390_IPL_TYPE_FCP);
> +        case S390_IPL_TYPE_CCW:
> +            return (be32_to_cpu(iplb->len) >= S390_IPLB_MIN_CCW_LEN &&
> +                    iplb->pbt == S390_IPL_TYPE_CCW);

That's a refactoring you could have split out.

> +        case S390_IPL_TYPE_PV:
> +            if(be32_to_cpu(iplb->len) < S390_IPLB_MIN_PV_LEN ||
> +               iplb->pbt != S390_IPL_TYPE_PV) {
> +                return false;
> +            }
> +            return s390_ipl_pv_check_components(iplb);
> +    default:
> +        return false;
> +    }
>  }
>  
>  #endif
> diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c
> new file mode 100644
> index 0000000000..ba6409246e
> --- /dev/null
> +++ b/hw/s390x/pv.c
> @@ -0,0 +1,104 @@
> +/*
> + * Secure execution functions
> + *
> + * Copyright IBM Corp. 2020
> + * Author(s):
> + *  Janosch Frank <frankja@linux.ibm.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or (at
> + * your option) any later version. See the COPYING file in the top-level
> + * directory.
> + */
> +#include "qemu/osdep.h"
> +
> +#include <linux/kvm.h>
> +
> +#include "qemu/error-report.h"
> +#include "sysemu/kvm.h"
> +#include "pv.h"
> +
> +const char *cmd_names[] = {
> +    "VM_ENABLE",
> +    "VM_DISABLE",
> +    "VM_SET_SEC_PARAMS",
> +    "VM_UNPACK",
> +    "VM_VERIFY",
> +    "VM_PREP_RESET",
> +    "VM_UNSHARE_ALL",
> +};
> +
> +static int s390_pv_cmd(uint32_t cmd, void *data)
> +{
> +    int rc;
> +    struct kvm_pv_cmd pv_cmd = {
> +        .cmd = cmd,
> +        .data = (uint64_t)data,
> +    };
> +
> +    rc = kvm_vm_ioctl(kvm_state, KVM_S390_PV_COMMAND, &pv_cmd);
> +    if (rc) {
> +        error_report("KVM PV command %d (%s) failed: header rc %x rrc %x "
> +                     "IOCTL rc: %d", cmd, cmd_names[cmd], pv_cmd.rc, pv_cmd.rrc,
> +                     rc);
> +    }
> +    return rc;
> +}
> +
> +static void s390_pv_cmd_exit(uint32_t cmd, void *data)
> +{
> +    int rc;
> +
> +    rc = s390_pv_cmd(cmd, data);
> +    if (rc) {
> +        exit(1);
> +    }
> +}
> +
> +int s390_pv_vm_enable(void)
> +{
> +    return s390_pv_cmd(KVM_PV_ENABLE, NULL);
> +}
> +
> +void s390_pv_vm_disable(void)
> +{
> +     s390_pv_cmd_exit(KVM_PV_DISABLE, NULL);
> +}
> +
> +int s390_pv_set_sec_parms(uint64_t origin, uint64_t length)
> +{
> +    struct kvm_s390_pv_sec_parm args = {
> +        .origin = origin,
> +        .length = length,
> +    };
> +
> +    return s390_pv_cmd(KVM_PV_VM_SET_SEC_PARMS, &args);
> +}
> +
> +/*
> + * Called for each component in the SE type IPL parameter block 0.
> + */
> +int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak)
> +{
> +    struct kvm_s390_pv_unp args = {
> +        .addr = addr,
> +        .size = size,
> +        .tweak = tweak,
> +    };
> +
> +    return s390_pv_cmd(KVM_PV_VM_UNPACK, &args);
> +}
> +
> +void s390_pv_perf_clear_reset(void)
> +{
> +    s390_pv_cmd_exit(KVM_PV_VM_PREP_RESET, NULL);
> +}
> +
> +int s390_pv_verify(void)
> +{
> +    return s390_pv_cmd(KVM_PV_VM_VERIFY, NULL);
> +}
> +
> +void s390_pv_unshare(void)
> +{
> +    s390_pv_cmd_exit(KVM_PV_VM_UNSHARE_ALL, NULL);
> +}
> diff --git a/hw/s390x/pv.h b/hw/s390x/pv.h
> new file mode 100644
> index 0000000000..e58fbca96a
> --- /dev/null
> +++ b/hw/s390x/pv.h
> @@ -0,0 +1,34 @@
> +/*
> + * Protected Virtualization header
> + *
> + * Copyright IBM Corp. 2020
> + * Author(s):
> + *  Janosch Frank <frankja@linux.ibm.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or (at
> + * your option) any later version. See the COPYING file in the top-level
> + * directory.
> + */
> +
> +#ifndef HW_S390_PV_H
> +#define HW_S390_PV_H
> +
> +#ifdef CONFIG_KVM
> +int s390_pv_vm_enable(void);
> +void s390_pv_vm_disable(void);
> +int s390_pv_set_sec_parms(uint64_t origin, uint64_t length);
> +int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak);
> +void s390_pv_perf_clear_reset(void);
> +int s390_pv_verify(void);
> +void s390_pv_unshare(void);
> +#else
> +static inline int s390_pv_vm_enable(void) { return 0; }
> +static inline void s390_pv_vm_disable(void) {}
> +static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; }
> +static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; }
> +static inline void s390_pv_perf_clear_reset(void) {}
> +static inline int s390_pv_verify(void) { return 0; }
> +static inline void s390_pv_unshare(void) {}
> +#endif
> +
> +#endif /* HW_S390_PV_H */
> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> index a89cf4c129..f718cfc591 100644
> --- a/hw/s390x/s390-virtio-ccw.c
> +++ b/hw/s390x/s390-virtio-ccw.c
> @@ -41,6 +41,8 @@
>  #include "hw/qdev-properties.h"
>  #include "hw/s390x/tod.h"
>  #include "sysemu/sysemu.h"
> +#include "hw/s390x/pv.h"
> +#include <linux/kvm.h>
>  
>  S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)
>  {
> @@ -238,9 +240,11 @@ static void s390_create_sclpconsole(const char *type, Chardev *chardev)
>  static void ccw_init(MachineState *machine)
>  {
>      int ret;
> +    S390CcwMachineState *ms = S390_CCW_MACHINE(machine);
>      VirtualCssBus *css_bus;
>      DeviceState *dev;
>  
> +    ms->pv = false;
>      s390_sclp_init();
>      /* init memory + setup max page size. Required for the CPU model */
>      s390_memory_init(machine->ram);
> @@ -316,10 +320,90 @@ static inline void s390_do_cpu_ipl(CPUState *cs, run_on_cpu_data arg)
>      s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu);
>  }
>  
> +static void s390_machine_unprotect(S390CcwMachineState *ms)
> +{
> +    CPUState *t;
> +
> +    if (ms->pv) {

This is always the case. The check for ms->pv dropped (and if other
patches require this, it is to be handled in the caller).

> +        s390_pv_vm_disable();
> +        CPU_FOREACH(t) {
> +            S390_CPU(t)->env.pv = false;
> +        }
> +        ms->pv = false;
> +    }
> +}
> +
> +static int s390_machine_protect(S390CcwMachineState *ms)
> +{
> +    CPUState *t;
> +    int rc;
> +
> +    /* Create SE VM */
> +    rc = s390_pv_vm_enable();
> +    if (rc) {
> +        return rc;
> +    }
> +
> +    CPU_FOREACH(t) {
> +        S390_CPU(t)->env.pv = true;
> +    }
> +    ms->pv = true;
> +
> +    /* Set SE header and unpack */
> +    rc = s390_ipl_prepare_pv_header();
> +    if (rc) {
> +        goto out_err;
> +    }
> +
> +    /* Decrypt image */
> +    rc = s390_ipl_pv_unpack();
> +    if (rc) {
> +        goto out_err;
> +    }
> +
> +    /* Verify integrity */
> +    rc = s390_pv_verify();
> +    if (rc) {
> +        goto out_err;
> +    }
> +    return rc;
> +
> +out_err:
> +    s390_machine_unprotect(ms);
> +    return rc;
> +}
> +
> +#define DIAG_308_RC_INVAL_FOR_PV    0x0a02
> +static void s390_machine_inject_pv_error(CPUState *cs)
> +{
> +    int r1 = (cs->kvm_run->s390_sieic.ipa & 0x00f0) >> 4;
> +    CPUS390XState *env = &S390_CPU(cs)->env;
> +
> +    /* Report that we are unable to enter protected mode */
> +    env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV;
> +}
> +
> +static void s390_pv_prepare_reset(CPUS390XState *env)
> +{
> +    CPUState *cs;
> +
> +    if (!env->pv) {
> +        return;
> +    }
> +    CPU_FOREACH(cs) {
> +        s390_cpu_set_state(S390_CPU_STATE_STOPPED, S390_CPU(cs));
> +    }

This is done by the CPU reset in all cases? Why is that needed? (->comment)

> +    s390_pv_unshare();
> +    s390_pv_perf_clear_reset();
> +}
> +
>  static void s390_machine_reset(MachineState *machine)
>  {
>      enum s390_reset reset_type;
>      CPUState *cs, *t;
> +    S390CPU *cpu;
> +    S390CcwMachineState *ms = S390_CCW_MACHINE(machine);

Nit: Move this to the very top.

> +    CPUS390XState *env;
>  
>      /* get the reset parameters, reset them once done */
>      s390_ipl_get_reset_request(&cs, &reset_type);
> @@ -327,9 +411,16 @@ static void s390_machine_reset(MachineState *machine)
>      /* all CPUs are paused and synchronized at this point */
>      s390_cmma_reset();
>  
> +    cpu = S390_CPU(cs);
> +    env = &cpu->env;

Can you just pass "cpu" to s390_pv_prepare_reset() and handle it in there?

> +
>      switch (reset_type) {
>      case S390_RESET_EXTERNAL:
>      case S390_RESET_REIPL:
> +        if (ms->pv) {
> +            s390_machine_unprotect(ms);
> +        }
> +
>          qemu_devices_reset();
>          s390_crypto_reset();
>  
> @@ -337,22 +428,52 @@ static void s390_machine_reset(MachineState *machine)
>          run_on_cpu(cs, s390_do_cpu_ipl, RUN_ON_CPU_NULL);
>          break;
>      case S390_RESET_MODIFIED_CLEAR:
> +        /*
> +         * Susbsystem reset needs to be done before we unshare memory
> +         * and loose access to VIRTIO structures in guest memory.
> +         */
> +        subsystem_reset();
> +        s390_crypto_reset();
> +        s390_pv_prepare_reset(env);
>          CPU_FOREACH(t) {
>              run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
>          }
> -        subsystem_reset();
> -        s390_crypto_reset();
>          run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL);
>          break;
>      case S390_RESET_LOAD_NORMAL:
> +        /*
> +         * Susbsystem reset needs to be done before we unshare memory
> +         * and loose access to VIRTIO structures in guest memory.
> +         */
> +        subsystem_reset();
> +        s390_pv_prepare_reset(env);
>          CPU_FOREACH(t) {
>              if (t == cs) {
>                  continue;
>              }
>              run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL);
>          }
> -        subsystem_reset();
>          run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL);
> +        run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL);
> +        break;
> +    case S390_RESET_PV: /* Subcode 10 */
> +        subsystem_reset();
> +        s390_crypto_reset();
> +
> +        CPU_FOREACH(t) {
> +            if (t == cs) {
> +                continue;
> +            }
> +            run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
> +        }
> +        run_on_cpu(cs, s390_do_cpu_reset, RUN_ON_CPU_NULL);
> +
> +        if (s390_machine_protect(ms)) {
> +            s390_machine_inject_pv_error(cs);
> +            s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu);
> +            return;
> +        }
> +
>          run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL);

[...]

>  
> +#if !defined(CONFIG_USER_ONLY)
> +static bool machine_is_pv(MachineState *ms)
> +{
> +    Object *obj;
> +
> +    /* we have to bail out for the "none" machine */
> +    obj = object_dynamic_cast(OBJECT(ms), TYPE_S390_CCW_MACHINE);
> +     if (!obj) {
> +        return false;
> +    }
> +    return S390_CCW_MACHINE(obj)->pv;

Maybe you want to cache the machine, so you can avoid the
lookup+conversion on every new CPU.

> +}
> +#endif

[...]
>  static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr,
>                                uintptr_t ra, bool write)
>  {
> +    /* Handled by the Ultravisor */
> +    if (env->pv) {
> +        return 0;
> +    }
>      if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) {
>          s390_program_interrupt(env, PGM_SPECIFICATION, ra);
>          return -1;
> @@ -93,6 +101,11 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra)
>          return;
>      }
>  
> +    if (subcode > 7 && !s390_has_feat(S390_FEAT_UNPACK)) {

>= DIAG308_PV_SET

> +        s390_program_interrupt(env, PGM_SPECIFICATION, ra);
> +        return;
> +    }
> +
Viktor Mihajlovski March 9, 2020, 2:28 p.m. UTC | #2
On 3/9/20 12:21 PM, Janosch Frank wrote:
> The unpack facility provides the means to setup a protected guest. A
> protected guest can not be introspected by the hypervisor or any
> user/administrator of the machine it is running on.
> 
> Protected guests are encrypted at rest and need a special boot
> mechanism via diag308 subcode 8 and 10.
> 
> Code 8 sets the PV specific IPLB which is retained seperately from
> those set via code 5.
> 
> Code 10 is used to unpack the VM into protected memory, verify its
> integrity and start it.
> 
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> [Changes
> to machine]
A nit: [Changes...] should go before the s-o-b.
> ---
>   hw/s390x/Makefile.objs              |   1 +
>   hw/s390x/ipl.c                      |  59 ++++++++++++-
>   hw/s390x/ipl.h                      |  72 ++++++++++++++--
>   hw/s390x/pv.c                       | 104 +++++++++++++++++++++++
>   hw/s390x/pv.h                       |  34 ++++++++
>   hw/s390x/s390-virtio-ccw.c          | 127 +++++++++++++++++++++++++++-
>   include/hw/s390x/s390-virtio-ccw.h  |   1 +
>   target/s390x/cpu.c                  |  17 ++++
>   target/s390x/cpu.h                  |   1 +
>   target/s390x/cpu_features_def.inc.h |   1 +
>   target/s390x/diag.c                 |  34 +++++++-
>   11 files changed, 436 insertions(+), 15 deletions(-)
>   create mode 100644 hw/s390x/pv.c
>   create mode 100644 hw/s390x/pv.h
> 
> diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
> index e02ed80b68..a46a1c7894 100644
> --- a/hw/s390x/Makefile.objs
> +++ b/hw/s390x/Makefile.objs
> @@ -31,6 +31,7 @@ obj-y += tod-qemu.o
>   obj-$(CONFIG_KVM) += tod-kvm.o
>   obj-$(CONFIG_KVM) += s390-skeys-kvm.o
>   obj-$(CONFIG_KVM) += s390-stattrib-kvm.o
> +obj-$(CONFIG_KVM) += pv.o
>   obj-y += s390-ccw.o
>   obj-y += ap-device.o
>   obj-y += ap-bridge.o
> diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
> index 9c1ecd423c..ba3eac30c6 100644
> --- a/hw/s390x/ipl.c
> +++ b/hw/s390x/ipl.c
> @@ -33,6 +33,7 @@
>   #include "qemu/cutils.h"
>   #include "qemu/option.h"
>   #include "exec/exec-all.h"
> +#include "pv.h"
> 
>   #define KERN_IMAGE_START                0x010000UL
>   #define LINUX_MAGIC_ADDR                0x010008UL
> @@ -542,11 +543,30 @@ void s390_ipl_update_diag308(IplParameterBlock *iplb)
>   {
>       S390IPLState *ipl = get_ipl_device();
> 
> -    ipl->iplb = *iplb;
> -    ipl->iplb_valid = true;
> +    /*
> +     * The IPLB set and retrieved by subcodes 8/9 is completely
> +     * separate from the one managed via subcodes 5/6.
> +     */
> +    if (iplb->pbt == S390_IPL_TYPE_PV) {
> +        ipl->iplb_pv = *iplb;
> +        ipl->iplb_valid_pv = true;
> +    } else {
> +        ipl->iplb = *iplb;
> +        ipl->iplb_valid = true;
> +    }
>       ipl->netboot = is_virtio_net_device(iplb);
>   }
> 
> +IplParameterBlock *s390_ipl_get_iplb_pv(void)
> +{
> +    S390IPLState *ipl = get_ipl_device();
> +
> +    if (!ipl->iplb_valid_pv) {
> +        return NULL;
> +    }
> +    return &ipl->iplb_pv;
> +}
> +
>   IplParameterBlock *s390_ipl_get_iplb(void)
>   {
>       S390IPLState *ipl = get_ipl_device();
> @@ -561,7 +581,8 @@ void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type)
>   {
>       S390IPLState *ipl = get_ipl_device();
> 
> -    if (reset_type == S390_RESET_EXTERNAL || reset_type == S390_RESET_REIPL) {
> +    if (reset_type == S390_RESET_EXTERNAL || reset_type == S390_RESET_REIPL ||
> +        reset_type == S390_RESET_PV) {
>           /* use CPU 0 for full resets */
>           ipl->reset_cpu_index = 0;
>       } else {
> @@ -635,6 +656,38 @@ static void s390_ipl_prepare_qipl(S390CPU *cpu)
>       cpu_physical_memory_unmap(addr, len, 1, len);
>   }
> 
> +int s390_ipl_prepare_pv_header(void)
> +{
> +    S390IPLState *ipl = get_ipl_device();
> +    IPLBlockPV *ipib_pv = &ipl->iplb_pv.pv;
> +    void *hdr = g_malloc(ipib_pv->pv_header_len);
> +    int rc;
> +
> +    cpu_physical_memory_read(ipib_pv->pv_header_addr, hdr,
> +                             ipib_pv->pv_header_len);
> +    rc = s390_pv_set_sec_parms((uint64_t)hdr,
> +                          ipib_pv->pv_header_len);
> +    g_free(hdr);
> +    return rc;
> +}
> +
> +int s390_ipl_pv_unpack(void)
> +{
> +    int i, rc = 0;
> +    S390IPLState *ipl = get_ipl_device();
> +    IPLBlockPV *ipib_pv = &ipl->iplb_pv.pv;
> +
> +    for (i = 0; i < ipib_pv->num_comp; i++) {
> +        rc = s390_pv_unpack(ipib_pv->components[i].addr,
> +                            TARGET_PAGE_ALIGN(ipib_pv->components[i].size),
> +                            ipib_pv->components[i].tweak_pref);
> +        if (rc) {
> +            break;
> +        }
> +    }
> +    return rc;
> +}
> +
>   void s390_ipl_prepare_cpu(S390CPU *cpu)
>   {
>       S390IPLState *ipl = get_ipl_device();
> diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
> index d4813105db..b2ccdd9dae 100644
> --- a/hw/s390x/ipl.h
> +++ b/hw/s390x/ipl.h
> @@ -15,6 +15,24 @@
>   #include "cpu.h"
>   #include "hw/qdev-core.h"
> 
> +struct IPLBlockPVComp {
> +    uint64_t tweak_pref;
> +    uint64_t addr;
> +    uint64_t size;
> +} QEMU_PACKED;
> +typedef struct IPLBlockPVComp IPLBlockPVComp;
> +
> +struct IPLBlockPV {
> +    uint8_t  reserved18[87];    /* 0x18 */
> +    uint8_t  version;           /* 0x6f */
> +    uint32_t reserved70;        /* 0x70 */
> +    uint32_t num_comp;          /* 0x70 */
Another nit: offset should be 0x74
> +    uint64_t pv_header_addr;    /* 0x74 */
> +    uint64_t pv_header_len;     /* 0x7c */
> +    struct IPLBlockPVComp components[];
> +} QEMU_PACKED;
> +typedef struct IPLBlockPV IPLBlockPV;
> +
>   struct IplBlockCcw {
>       uint8_t  reserved0[85];
>       uint8_t  ssid;[...]
Christian Borntraeger March 9, 2020, 2:40 p.m. UTC | #3
On 09.03.20 14:37, David Hildenbrand wrote:

>>  
>> +#if !defined(CONFIG_USER_ONLY)
>> +static bool machine_is_pv(MachineState *ms)
>> +{
>> +    Object *obj;
>> +
>> +    /* we have to bail out for the "none" machine */
>> +    obj = object_dynamic_cast(OBJECT(ms), TYPE_S390_CCW_MACHINE);
>> +     if (!obj) {
>> +        return false;
>> +    }
>> +    return S390_CCW_MACHINE(obj)->pv;
> 
> Maybe you want to cache the machine, so you can avoid the
> lookup+conversion on every new CPU.
> 

something like the following?


diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index c513f8efe0..cd12c29b9a 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -185,13 +185,18 @@ static void s390_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
 static bool machine_is_pv(MachineState *ms)
 {
     Object *obj;
+    static S390CcwMachineState *ccw;
+
+    if (ccw)
+           return ccw->pv;
 
     /* we have to bail out for the "none" machine */
     obj = object_dynamic_cast(OBJECT(ms), TYPE_S390_CCW_MACHINE);
      if (!obj) {
         return false;
     }
-    return S390_CCW_MACHINE(obj)->pv;
+    ccw = S390_CCW_MACHINE(obj);
+    return ccw->pv;
 }
 #endif
David Hildenbrand March 9, 2020, 2:42 p.m. UTC | #4
On 09.03.20 15:40, Christian Borntraeger wrote:
> 
> 
> On 09.03.20 14:37, David Hildenbrand wrote:
> 
>>>  
>>> +#if !defined(CONFIG_USER_ONLY)
>>> +static bool machine_is_pv(MachineState *ms)
>>> +{
>>> +    Object *obj;
>>> +
>>> +    /* we have to bail out for the "none" machine */
>>> +    obj = object_dynamic_cast(OBJECT(ms), TYPE_S390_CCW_MACHINE);
>>> +     if (!obj) {
>>> +        return false;
>>> +    }
>>> +    return S390_CCW_MACHINE(obj)->pv;
>>
>> Maybe you want to cache the machine, so you can avoid the
>> lookup+conversion on every new CPU.
>>
> 
> something like the following?
> 
> 
> diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
> index c513f8efe0..cd12c29b9a 100644
> --- a/target/s390x/cpu.c
> +++ b/target/s390x/cpu.c
> @@ -185,13 +185,18 @@ static void s390_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
>  static bool machine_is_pv(MachineState *ms)
>  {
>      Object *obj;
> +    static S390CcwMachineState *ccw;
> +

Move this up please (reverse christmas tree ;) )

> +    if (ccw)
> +           return ccw->pv;
>  
>      /* we have to bail out for the "none" machine */
>      obj = object_dynamic_cast(OBJECT(ms), TYPE_S390_CCW_MACHINE);
>       if (!obj) {
>          return false;
>      }
> -    return S390_CCW_MACHINE(obj)->pv;
> +    ccw = S390_CCW_MACHINE(obj);
> +    return ccw->pv;
>  }
>  #endif

Yeah, guess this makes sense.
Janosch Frank March 9, 2020, 2:46 p.m. UTC | #5
On 3/9/20 3:28 PM, Viktor Mihajlovski wrote:
> 
> 
> On 3/9/20 12:21 PM, Janosch Frank wrote:
>> The unpack facility provides the means to setup a protected guest. A
>> protected guest can not be introspected by the hypervisor or any
>> user/administrator of the machine it is running on.
>>
>> Protected guests are encrypted at rest and need a special boot
>> mechanism via diag308 subcode 8 and 10.
>>
>> Code 8 sets the PV specific IPLB which is retained seperately from
>> those set via code 5.
>>
>> Code 10 is used to unpack the VM into protected memory, verify its
>> integrity and start it.
>>
>> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> [Changes
>> to machine]
> A nit: [Changes...] should go before the s-o-b.

Will be changed anyway

>> ---
>>   hw/s390x/Makefile.objs              |   1 +
>>   hw/s390x/ipl.c                      |  59 ++++++++++++-
>>   hw/s390x/ipl.h                      |  72 ++++++++++++++--
>>   hw/s390x/pv.c                       | 104 +++++++++++++++++++++++
>>   hw/s390x/pv.h                       |  34 ++++++++
>>   hw/s390x/s390-virtio-ccw.c          | 127 +++++++++++++++++++++++++++-
>>   include/hw/s390x/s390-virtio-ccw.h  |   1 +
>>   target/s390x/cpu.c                  |  17 ++++
>>   target/s390x/cpu.h                  |   1 +
>>   target/s390x/cpu_features_def.inc.h |   1 +
>>   target/s390x/diag.c                 |  34 +++++++-
>>   11 files changed, 436 insertions(+), 15 deletions(-)
>>   create mode 100644 hw/s390x/pv.c
>>   create mode 100644 hw/s390x/pv.h
>>
>> diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
>> index e02ed80b68..a46a1c7894 100644
>> --- a/hw/s390x/Makefile.objs
>> +++ b/hw/s390x/Makefile.objs
>> @@ -31,6 +31,7 @@ obj-y += tod-qemu.o
>>   obj-$(CONFIG_KVM) += tod-kvm.o
>>   obj-$(CONFIG_KVM) += s390-skeys-kvm.o
>>   obj-$(CONFIG_KVM) += s390-stattrib-kvm.o
>> +obj-$(CONFIG_KVM) += pv.o
>>   obj-y += s390-ccw.o
>>   obj-y += ap-device.o
>>   obj-y += ap-bridge.o
>> diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
>> index 9c1ecd423c..ba3eac30c6 100644
>> --- a/hw/s390x/ipl.c
>> +++ b/hw/s390x/ipl.c
>> @@ -33,6 +33,7 @@
>>   #include "qemu/cutils.h"
>>   #include "qemu/option.h"
>>   #include "exec/exec-all.h"
>> +#include "pv.h"
>>
>>   #define KERN_IMAGE_START                0x010000UL
>>   #define LINUX_MAGIC_ADDR                0x010008UL
>> @@ -542,11 +543,30 @@ void s390_ipl_update_diag308(IplParameterBlock *iplb)
>>   {
>>       S390IPLState *ipl = get_ipl_device();
>>
>> -    ipl->iplb = *iplb;
>> -    ipl->iplb_valid = true;
>> +    /*
>> +     * The IPLB set and retrieved by subcodes 8/9 is completely
>> +     * separate from the one managed via subcodes 5/6.
>> +     */
>> +    if (iplb->pbt == S390_IPL_TYPE_PV) {
>> +        ipl->iplb_pv = *iplb;
>> +        ipl->iplb_valid_pv = true;
>> +    } else {
>> +        ipl->iplb = *iplb;
>> +        ipl->iplb_valid = true;
>> +    }
>>       ipl->netboot = is_virtio_net_device(iplb);
>>   }
>>
>> +IplParameterBlock *s390_ipl_get_iplb_pv(void)
>> +{
>> +    S390IPLState *ipl = get_ipl_device();
>> +
>> +    if (!ipl->iplb_valid_pv) {
>> +        return NULL;
>> +    }
>> +    return &ipl->iplb_pv;
>> +}
>> +
>>   IplParameterBlock *s390_ipl_get_iplb(void)
>>   {
>>       S390IPLState *ipl = get_ipl_device();
>> @@ -561,7 +581,8 @@ void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type)
>>   {
>>       S390IPLState *ipl = get_ipl_device();
>>
>> -    if (reset_type == S390_RESET_EXTERNAL || reset_type == S390_RESET_REIPL) {
>> +    if (reset_type == S390_RESET_EXTERNAL || reset_type == S390_RESET_REIPL ||
>> +        reset_type == S390_RESET_PV) {
>>           /* use CPU 0 for full resets */
>>           ipl->reset_cpu_index = 0;
>>       } else {
>> @@ -635,6 +656,38 @@ static void s390_ipl_prepare_qipl(S390CPU *cpu)
>>       cpu_physical_memory_unmap(addr, len, 1, len);
>>   }
>>
>> +int s390_ipl_prepare_pv_header(void)
>> +{
>> +    S390IPLState *ipl = get_ipl_device();
>> +    IPLBlockPV *ipib_pv = &ipl->iplb_pv.pv;
>> +    void *hdr = g_malloc(ipib_pv->pv_header_len);
>> +    int rc;
>> +
>> +    cpu_physical_memory_read(ipib_pv->pv_header_addr, hdr,
>> +                             ipib_pv->pv_header_len);
>> +    rc = s390_pv_set_sec_parms((uint64_t)hdr,
>> +                          ipib_pv->pv_header_len);
>> +    g_free(hdr);
>> +    return rc;
>> +}
>> +
>> +int s390_ipl_pv_unpack(void)
>> +{
>> +    int i, rc = 0;
>> +    S390IPLState *ipl = get_ipl_device();
>> +    IPLBlockPV *ipib_pv = &ipl->iplb_pv.pv;
>> +
>> +    for (i = 0; i < ipib_pv->num_comp; i++) {
>> +        rc = s390_pv_unpack(ipib_pv->components[i].addr,
>> +                            TARGET_PAGE_ALIGN(ipib_pv->components[i].size),
>> +                            ipib_pv->components[i].tweak_pref);
>> +        if (rc) {
>> +            break;
>> +        }
>> +    }
>> +    return rc;
>> +}
>> +
>>   void s390_ipl_prepare_cpu(S390CPU *cpu)
>>   {
>>       S390IPLState *ipl = get_ipl_device();
>> diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
>> index d4813105db..b2ccdd9dae 100644
>> --- a/hw/s390x/ipl.h
>> +++ b/hw/s390x/ipl.h
>> @@ -15,6 +15,24 @@
>>   #include "cpu.h"
>>   #include "hw/qdev-core.h"
>>
>> +struct IPLBlockPVComp {
>> +    uint64_t tweak_pref;
>> +    uint64_t addr;
>> +    uint64_t size;
>> +} QEMU_PACKED;
>> +typedef struct IPLBlockPVComp IPLBlockPVComp;
>> +
>> +struct IPLBlockPV {
>> +    uint8_t  reserved18[87];    /* 0x18 */
>> +    uint8_t  version;           /* 0x6f */
>> +    uint32_t reserved70;        /* 0x70 */
>> +    uint32_t num_comp;          /* 0x70 */
> Another nit: offset should be 0x74

Ups

>> +    uint64_t pv_header_addr;    /* 0x74 */
>> +    uint64_t pv_header_len;     /* 0x7c */
>> +    struct IPLBlockPVComp components[];
>> +} QEMU_PACKED;
>> +typedef struct IPLBlockPV IPLBlockPV;
>> +
>>   struct IplBlockCcw {
>>       uint8_t  reserved0[85];
>>       uint8_t  ssid;[...]
Janosch Frank March 9, 2020, 3:42 p.m. UTC | #6
On 3/9/20 2:37 PM, David Hildenbrand wrote:
> On 09.03.20 12:21, Janosch Frank wrote:
>> The unpack facility provides the means to setup a protected guest. A
>> protected guest can not be introspected by the hypervisor or any
>> user/administrator of the machine it is running on.
>>
>> Protected guests are encrypted at rest and need a special boot
>> mechanism via diag308 subcode 8 and 10.
>>
>> Code 8 sets the PV specific IPLB which is retained seperately from
>> those set via code 5.
>>
>> Code 10 is used to unpack the VM into protected memory, verify its
>> integrity and start it.
>>
>> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> [Changes
>> to machine]
> 
> As you signed this patch off, Maybe this should rather be a Co-developed-by:
> 

Ack

> [...]
> 
>>  {
>>      S390IPLState *ipl = get_ipl_device();
>> @@ -561,7 +581,8 @@ void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type)
>>  {
>>      S390IPLState *ipl = get_ipl_device();
>>  
>> -    if (reset_type == S390_RESET_EXTERNAL || reset_type == S390_RESET_REIPL) {
>> +    if (reset_type == S390_RESET_EXTERNAL || reset_type == S390_RESET_REIPL ||
>> +        reset_type == S390_RESET_PV) {
>>          /* use CPU 0 for full resets */
>>          ipl->reset_cpu_index = 0;
> 
> This looks wrong. In case of an error, you modify the registers of a
> theoretically unrelated CPU.

It does indeed, will fix.

> 
> 
>>      } else {
>> @@ -635,6 +656,38 @@ static void s390_ipl_prepare_qipl(S390CPU *cpu)
>>      cpu_physical_memory_unmap(addr, len, 1, len);
>>  }
>>  
>> +int s390_ipl_prepare_pv_header(void)
>> +{
>> +    S390IPLState *ipl = get_ipl_device();
>> +    IPLBlockPV *ipib_pv = &ipl->iplb_pv.pv;
>> +    void *hdr = g_malloc(ipib_pv->pv_header_len);
> 
> Should there be an upper limit? The guest can allocate quite some memory
> this way and theoretically crash the VM.

Yes, it's currently two pages max.

> 
>> +    int rc;
>> +
>> +    cpu_physical_memory_read(ipib_pv->pv_header_addr, hdr,
>> +                             ipib_pv->pv_header_len);
> 
> Shouldn't we validate if this memory is accessible at all?

Yes, I moved the check into iplb_valid so we don't need to worry about
that here.

> 
>> +    rc = s390_pv_set_sec_parms((uint64_t)hdr,
>> +                          ipib_pv->pv_header_len);
>> +    g_free(hdr);
>> +    return rc;
>> +}
>> +
>> +int s390_ipl_pv_unpack(void)
>> +{
>> +    int i, rc = 0;
> 
> NIT: These declarations last.

Ack

> 
>> +    S390IPLState *ipl = get_ipl_device();
>> +    IPLBlockPV *ipib_pv = &ipl->iplb_pv.pv;
> 
> use s390_ipl_get_iplb_pv() and assert that we don't get NULL?

We already check for NULL in the diag code before the DIAG308_PV_START
reset is set. I've used your other suggestion though.

> 
>> +
>> +    for (i = 0; i < ipib_pv->num_comp; i++) {
>> +        rc = s390_pv_unpack(ipib_pv->components[i].addr,
>> +                            TARGET_PAGE_ALIGN(ipib_pv->components[i].size),
>> +                            ipib_pv->components[i].tweak_pref);
>> +        if (rc) {
>> +            break;
>> +        }
>> +    }
>> +    return rc;
>> +}
>> +
>>  void s390_ipl_prepare_cpu(S390CPU *cpu)
>>  {
>>      S390IPLState *ipl = get_ipl_device();
>> diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
>> index d4813105db..b2ccdd9dae 100644
>> --- a/hw/s390x/ipl.h
>> +++ b/hw/s390x/ipl.h
>> @@ -15,6 +15,24 @@
>>  #include "cpu.h"
>>  #include "hw/qdev-core.h"
>>  
>> +struct IPLBlockPVComp {
>> +    uint64_t tweak_pref;
>> +    uint64_t addr;
>> +    uint64_t size;
>> +} QEMU_PACKED;
>> +typedef struct IPLBlockPVComp IPLBlockPVComp;
>> +
>> +struct IPLBlockPV {
>> +    uint8_t  reserved18[87];    /* 0x18 */
>> +    uint8_t  version;           /* 0x6f */
>> +    uint32_t reserved70;        /* 0x70 */
>> +    uint32_t num_comp;          /* 0x70 */
>> +    uint64_t pv_header_addr;    /* 0x74 */
>> +    uint64_t pv_header_len;     /* 0x7c */
>> +    struct IPLBlockPVComp components[];
>> +} QEMU_PACKED;
>> +typedef struct IPLBlockPV IPLBlockPV;
>> +
>>  struct IplBlockCcw {
>>      uint8_t  reserved0[85];
>>      uint8_t  ssid;
>> @@ -71,6 +89,7 @@ union IplParameterBlock {
>>          union {
>>              IplBlockCcw ccw;
>>              IplBlockFcp fcp;
>> +            IPLBlockPV pv;
>>              IplBlockQemuScsi scsi;
>>          };
>>      } QEMU_PACKED;
>> @@ -85,8 +104,11 @@ typedef union IplParameterBlock IplParameterBlock;
>>  
>>  int s390_ipl_set_loadparm(uint8_t *loadparm);
>>  void s390_ipl_update_diag308(IplParameterBlock *iplb);
>> +int s390_ipl_prepare_pv_header(void);
>> +int s390_ipl_pv_unpack(void);
>>  void s390_ipl_prepare_cpu(S390CPU *cpu);
>>  IplParameterBlock *s390_ipl_get_iplb(void);
>> +IplParameterBlock *s390_ipl_get_iplb_pv(void);
>>  
>>  enum s390_reset {
>>      /* default is a reset not triggered by a CPU e.g. issued by QMP */
>> @@ -94,6 +116,7 @@ enum s390_reset {
>>      S390_RESET_REIPL,
>>      S390_RESET_MODIFIED_CLEAR,
>>      S390_RESET_LOAD_NORMAL,
>> +    S390_RESET_PV,
>>  };
>>  void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type);
>>  void s390_ipl_get_reset_request(CPUState **cs, enum s390_reset *reset_type);
>> @@ -133,6 +156,7 @@ struct S390IPLState {
>>      /*< private >*/
>>      DeviceState parent_obj;
>>      IplParameterBlock iplb;
>> +    IplParameterBlock iplb_pv;
>>      QemuIplParameters qipl;
>>      uint64_t start_addr;
>>      uint64_t compat_start_addr;
>> @@ -140,6 +164,7 @@ struct S390IPLState {
>>      uint64_t compat_bios_start_addr;
>>      bool enforce_bios;
>>      bool iplb_valid;
>> +    bool iplb_valid_pv;
>>      bool netboot;
>>      /* reset related properties don't have to be migrated or reset */
>>      enum s390_reset reset_type;
>> @@ -161,9 +186,11 @@ QEMU_BUILD_BUG_MSG(offsetof(S390IPLState, iplb) & 3, "alignment of iplb wrong");
>>  
>>  #define S390_IPL_TYPE_FCP 0x00
>>  #define S390_IPL_TYPE_CCW 0x02
>> +#define S390_IPL_TYPE_PV 0x05
>>  #define S390_IPL_TYPE_QEMU_SCSI 0xff
>>  
>>  #define S390_IPLB_HEADER_LEN 8
>> +#define S390_IPLB_MIN_PV_LEN 148
>>  #define S390_IPLB_MIN_CCW_LEN 200
>>  #define S390_IPLB_MIN_FCP_LEN 384
>>  #define S390_IPLB_MIN_QEMU_SCSI_LEN 200
>> @@ -173,16 +200,49 @@ static inline bool iplb_valid_len(IplParameterBlock *iplb)
>>      return be32_to_cpu(iplb->len) <= sizeof(IplParameterBlock);
>>  }
>>  
>> -static inline bool iplb_valid_ccw(IplParameterBlock *iplb)
>> +static inline bool s390_ipl_pv_check_components(IplParameterBlock *iplb)
> 
> Still don't like the function name :)
> 
> ipl_valid_pv_components() ?

If it makes you happier

> 
>>  {
>> -    return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_CCW_LEN &&
>> -           iplb->pbt == S390_IPL_TYPE_CCW;
>> +    IPLBlockPV *ipib_pv = &iplb->pv;
>> +    int i;
>> +
>> +    if (ipib_pv->num_comp == 0) {
>> +        return false;
>> +    }
>> +
>> +    for (i = 0; i < ipib_pv->num_comp; i++) {
>> +        /* Addr must be 4k aligned */
>> +        if (ipib_pv->components[i].addr & ~TARGET_PAGE_MASK) {
>> +            return false;
>> +        }
>> +
>> +        /* Tweak prefix is monotonically increasing with each component */
>> +        if (i < ipib_pv->num_comp - 1 &&
>> +            ipib_pv->components[i].tweak_pref >=
>> +            ipib_pv->components[i + 1].tweak_pref) {
>> +            return false;
>> +        }
>> +    }
>> +    return true;
>>  }
>>  
>> -static inline bool iplb_valid_fcp(IplParameterBlock *iplb)
>> +static inline bool iplb_valid(IplParameterBlock *iplb)
>>  {
>> -    return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_FCP_LEN &&
>> -           iplb->pbt == S390_IPL_TYPE_FCP;
>> +    switch (iplb->pbt) {
>> +        case S390_IPL_TYPE_FCP:
>> +            return (be32_to_cpu(iplb->len) >= S390_IPLB_MIN_FCP_LEN &&
>> +                    iplb->pbt == S390_IPL_TYPE_FCP);
>> +        case S390_IPL_TYPE_CCW:
>> +            return (be32_to_cpu(iplb->len) >= S390_IPLB_MIN_CCW_LEN &&
>> +                    iplb->pbt == S390_IPL_TYPE_CCW);
> 
> That's a refactoring you could have split out.

Sure

> 
>> +        case S390_IPL_TYPE_PV:
>> +            if(be32_to_cpu(iplb->len) < S390_IPLB_MIN_PV_LEN ||
>> +               iplb->pbt != S390_IPL_TYPE_PV) {
>> +                return false;
>> +            }
>> +            return s390_ipl_pv_check_components(iplb);
>> +    default:
>> +        return false;
>> +    }
>>  }
>>  
>>  #endif
>> diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c
>> new file mode 100644
>> index 0000000000..ba6409246e
>> --- /dev/null
>> +++ b/hw/s390x/pv.c
>> @@ -0,0 +1,104 @@
>> +/*
>> + * Secure execution functions
>> + *
>> + * Copyright IBM Corp. 2020
>> + * Author(s):
>> + *  Janosch Frank <frankja@linux.ibm.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or (at
>> + * your option) any later version. See the COPYING file in the top-level
>> + * directory.
>> + */
>> +#include "qemu/osdep.h"
>> +
>> +#include <linux/kvm.h>
>> +
>> +#include "qemu/error-report.h"
>> +#include "sysemu/kvm.h"
>> +#include "pv.h"
>> +
>> +const char *cmd_names[] = {
>> +    "VM_ENABLE",
>> +    "VM_DISABLE",
>> +    "VM_SET_SEC_PARAMS",
>> +    "VM_UNPACK",
>> +    "VM_VERIFY",
>> +    "VM_PREP_RESET",
>> +    "VM_UNSHARE_ALL",
>> +};
>> +
>> +static int s390_pv_cmd(uint32_t cmd, void *data)
>> +{
>> +    int rc;
>> +    struct kvm_pv_cmd pv_cmd = {
>> +        .cmd = cmd,
>> +        .data = (uint64_t)data,
>> +    };
>> +
>> +    rc = kvm_vm_ioctl(kvm_state, KVM_S390_PV_COMMAND, &pv_cmd);
>> +    if (rc) {
>> +        error_report("KVM PV command %d (%s) failed: header rc %x rrc %x "
>> +                     "IOCTL rc: %d", cmd, cmd_names[cmd], pv_cmd.rc, pv_cmd.rrc,
>> +                     rc);
>> +    }
>> +    return rc;
>> +}
>> +
>> +static void s390_pv_cmd_exit(uint32_t cmd, void *data)
>> +{
>> +    int rc;
>> +
>> +    rc = s390_pv_cmd(cmd, data);
>> +    if (rc) {
>> +        exit(1);
>> +    }
>> +}
>> +
>> +int s390_pv_vm_enable(void)
>> +{
>> +    return s390_pv_cmd(KVM_PV_ENABLE, NULL);
>> +}
>> +
>> +void s390_pv_vm_disable(void)
>> +{
>> +     s390_pv_cmd_exit(KVM_PV_DISABLE, NULL);
>> +}
>> +
>> +int s390_pv_set_sec_parms(uint64_t origin, uint64_t length)
>> +{
>> +    struct kvm_s390_pv_sec_parm args = {
>> +        .origin = origin,
>> +        .length = length,
>> +    };
>> +
>> +    return s390_pv_cmd(KVM_PV_VM_SET_SEC_PARMS, &args);
>> +}
>> +
>> +/*
>> + * Called for each component in the SE type IPL parameter block 0.
>> + */
>> +int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak)
>> +{
>> +    struct kvm_s390_pv_unp args = {
>> +        .addr = addr,
>> +        .size = size,
>> +        .tweak = tweak,
>> +    };
>> +
>> +    return s390_pv_cmd(KVM_PV_VM_UNPACK, &args);
>> +}
>> +
>> +void s390_pv_perf_clear_reset(void)
>> +{
>> +    s390_pv_cmd_exit(KVM_PV_VM_PREP_RESET, NULL);
>> +}
>> +
>> +int s390_pv_verify(void)
>> +{
>> +    return s390_pv_cmd(KVM_PV_VM_VERIFY, NULL);
>> +}
>> +
>> +void s390_pv_unshare(void)
>> +{
>> +    s390_pv_cmd_exit(KVM_PV_VM_UNSHARE_ALL, NULL);
>> +}
>> diff --git a/hw/s390x/pv.h b/hw/s390x/pv.h
>> new file mode 100644
>> index 0000000000..e58fbca96a
>> --- /dev/null
>> +++ b/hw/s390x/pv.h
>> @@ -0,0 +1,34 @@
>> +/*
>> + * Protected Virtualization header
>> + *
>> + * Copyright IBM Corp. 2020
>> + * Author(s):
>> + *  Janosch Frank <frankja@linux.ibm.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or (at
>> + * your option) any later version. See the COPYING file in the top-level
>> + * directory.
>> + */
>> +
>> +#ifndef HW_S390_PV_H
>> +#define HW_S390_PV_H
>> +
>> +#ifdef CONFIG_KVM
>> +int s390_pv_vm_enable(void);
>> +void s390_pv_vm_disable(void);
>> +int s390_pv_set_sec_parms(uint64_t origin, uint64_t length);
>> +int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak);
>> +void s390_pv_perf_clear_reset(void);
>> +int s390_pv_verify(void);
>> +void s390_pv_unshare(void);
>> +#else
>> +static inline int s390_pv_vm_enable(void) { return 0; }
>> +static inline void s390_pv_vm_disable(void) {}
>> +static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; }
>> +static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; }
>> +static inline void s390_pv_perf_clear_reset(void) {}
>> +static inline int s390_pv_verify(void) { return 0; }
>> +static inline void s390_pv_unshare(void) {}
>> +#endif
>> +
>> +#endif /* HW_S390_PV_H */
>> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
>> index a89cf4c129..f718cfc591 100644
>> --- a/hw/s390x/s390-virtio-ccw.c
>> +++ b/hw/s390x/s390-virtio-ccw.c
>> @@ -41,6 +41,8 @@
>>  #include "hw/qdev-properties.h"
>>  #include "hw/s390x/tod.h"
>>  #include "sysemu/sysemu.h"
>> +#include "hw/s390x/pv.h"
>> +#include <linux/kvm.h>
>>  
>>  S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)
>>  {
>> @@ -238,9 +240,11 @@ static void s390_create_sclpconsole(const char *type, Chardev *chardev)
>>  static void ccw_init(MachineState *machine)
>>  {
>>      int ret;
>> +    S390CcwMachineState *ms = S390_CCW_MACHINE(machine);
>>      VirtualCssBus *css_bus;
>>      DeviceState *dev;
>>  
>> +    ms->pv = false;
>>      s390_sclp_init();
>>      /* init memory + setup max page size. Required for the CPU model */
>>      s390_memory_init(machine->ram);
>> @@ -316,10 +320,90 @@ static inline void s390_do_cpu_ipl(CPUState *cs, run_on_cpu_data arg)
>>      s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu);
>>  }
>>  
>> +static void s390_machine_unprotect(S390CcwMachineState *ms)
>> +{
>> +    CPUState *t;
>> +
>> +    if (ms->pv) {
> 
> This is always the case. The check for ms->pv dropped (and if other
> patches require this, it is to be handled in the caller).

Ack

> 
>> +        s390_pv_vm_disable();
>> +        CPU_FOREACH(t) {
>> +            S390_CPU(t)->env.pv = false;
>> +        }
>> +        ms->pv = false;
>> +    }
>> +}
>> +
>> +static int s390_machine_protect(S390CcwMachineState *ms)
>> +{
>> +    CPUState *t;
>> +    int rc;
>> +
>> +    /* Create SE VM */
>> +    rc = s390_pv_vm_enable();
>> +    if (rc) {
>> +        return rc;
>> +    }
>> +
>> +    CPU_FOREACH(t) {
>> +        S390_CPU(t)->env.pv = true;
>> +    }
>> +    ms->pv = true;
>> +
>> +    /* Set SE header and unpack */
>> +    rc = s390_ipl_prepare_pv_header();
>> +    if (rc) {
>> +        goto out_err;
>> +    }
>> +
>> +    /* Decrypt image */
>> +    rc = s390_ipl_pv_unpack();
>> +    if (rc) {
>> +        goto out_err;
>> +    }
>> +
>> +    /* Verify integrity */
>> +    rc = s390_pv_verify();
>> +    if (rc) {
>> +        goto out_err;
>> +    }
>> +    return rc;
>> +
>> +out_err:
>> +    s390_machine_unprotect(ms);
>> +    return rc;
>> +}
>> +
>> +#define DIAG_308_RC_INVAL_FOR_PV    0x0a02
>> +static void s390_machine_inject_pv_error(CPUState *cs)
>> +{
>> +    int r1 = (cs->kvm_run->s390_sieic.ipa & 0x00f0) >> 4;
>> +    CPUS390XState *env = &S390_CPU(cs)->env;
>> +
>> +    /* Report that we are unable to enter protected mode */
>> +    env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV;
>> +}
>> +
>> +static void s390_pv_prepare_reset(CPUS390XState *env)
>> +{
>> +    CPUState *cs;
>> +
>> +    if (!env->pv) {
>> +        return;
>> +    }
>> +    CPU_FOREACH(cs) {
>> +        s390_cpu_set_state(S390_CPU_STATE_STOPPED, S390_CPU(cs));
>> +    }
> 
> This is done by the CPU reset in all cases? Why is that needed? (->comment)
> 
>> +    s390_pv_unshare();
>> +    s390_pv_perf_clear_reset();
>> +}
>> +
>>  static void s390_machine_reset(MachineState *machine)
>>  {
>>      enum s390_reset reset_type;
>>      CPUState *cs, *t;
>> +    S390CPU *cpu;
>> +    S390CcwMachineState *ms = S390_CCW_MACHINE(machine);
> 
> Nit: Move this to the very top.


Ack

> 
>> +    CPUS390XState *env;
>>  
>>      /* get the reset parameters, reset them once done */
>>      s390_ipl_get_reset_request(&cs, &reset_type);
>> @@ -327,9 +411,16 @@ static void s390_machine_reset(MachineState *machine)
>>      /* all CPUs are paused and synchronized at this point */
>>      s390_cmma_reset();
>>  
>> +    cpu = S390_CPU(cs);
>> +    env = &cpu->env;
> 
> Can you just pass "cpu" to s390_pv_prepare_reset() and handle it in there?

Wouldn't it make more sense to test the machine state here anyway?

> 
>> +
>>      switch (reset_type) {
>>      case S390_RESET_EXTERNAL:
>>      case S390_RESET_REIPL:
>> +        if (ms->pv) {
>> +            s390_machine_unprotect(ms);
>> +        }
>> +
>>          qemu_devices_reset();
>>          s390_crypto_reset();
>>  
>> @@ -337,22 +428,52 @@ static void s390_machine_reset(MachineState *machine)
>>          run_on_cpu(cs, s390_do_cpu_ipl, RUN_ON_CPU_NULL);
>>          break;
>>      case S390_RESET_MODIFIED_CLEAR:
>> +        /*
>> +         * Susbsystem reset needs to be done before we unshare memory
>> +         * and loose access to VIRTIO structures in guest memory.
>> +         */
>> +        subsystem_reset();
>> +        s390_crypto_reset();
>> +        s390_pv_prepare_reset(env);
>>          CPU_FOREACH(t) {
>>              run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
>>          }
>> -        subsystem_reset();
>> -        s390_crypto_reset();
>>          run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL);
>>          break;
>>      case S390_RESET_LOAD_NORMAL:
>> +        /*
>> +         * Susbsystem reset needs to be done before we unshare memory
>> +         * and loose access to VIRTIO structures in guest memory.
>> +         */
>> +        subsystem_reset();
>> +        s390_pv_prepare_reset(env);
>>          CPU_FOREACH(t) {
>>              if (t == cs) {
>>                  continue;
>>              }
>>              run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL);
>>          }
>> -        subsystem_reset();
>>          run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL);
>> +        run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL);
>> +        break;
>> +    case S390_RESET_PV: /* Subcode 10 */
>> +        subsystem_reset();
>> +        s390_crypto_reset();
>> +
>> +        CPU_FOREACH(t) {
>> +            if (t == cs) {
>> +                continue;
>> +            }
>> +            run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
>> +        }
>> +        run_on_cpu(cs, s390_do_cpu_reset, RUN_ON_CPU_NULL);
>> +
>> +        if (s390_machine_protect(ms)) {
>> +            s390_machine_inject_pv_error(cs);
>> +            s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu);
>> +            return;
>> +        }
>> +
>>          run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL);
> 
> [...]
> 
>>  
>> +#if !defined(CONFIG_USER_ONLY)
>> +static bool machine_is_pv(MachineState *ms)
>> +{
>> +    Object *obj;
>> +
>> +    /* we have to bail out for the "none" machine */
>> +    obj = object_dynamic_cast(OBJECT(ms), TYPE_S390_CCW_MACHINE);
>> +     if (!obj) {
>> +        return false;
>> +    }
>> +    return S390_CCW_MACHINE(obj)->pv;
> 
> Maybe you want to cache the machine, so you can avoid the
> lookup+conversion on every new CPU.

Christian has provided me with a fix to this code, I'll squash it.

> 
>> +}
>> +#endif
> 
> [...]
>>  static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr,
>>                                uintptr_t ra, bool write)
>>  {
>> +    /* Handled by the Ultravisor */
>> +    if (env->pv) {
>> +        return 0;
>> +    }
>>      if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) {
>>          s390_program_interrupt(env, PGM_SPECIFICATION, ra);
>>          return -1;
>> @@ -93,6 +101,11 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra)
>>          return;
>>      }
>>  
>> +    if (subcode > 7 && !s390_has_feat(S390_FEAT_UNPACK)) {
> 
>> = DIAG308_PV_SET

?

> 
>> +        s390_program_interrupt(env, PGM_SPECIFICATION, ra);
>> +        return;
>> +    }
>> +
> 
>
David Hildenbrand March 9, 2020, 3:48 p.m. UTC | #7
>>
>>> +    CPUS390XState *env;
>>>  
>>>      /* get the reset parameters, reset them once done */
>>>      s390_ipl_get_reset_request(&cs, &reset_type);
>>> @@ -327,9 +411,16 @@ static void s390_machine_reset(MachineState *machine)
>>>      /* all CPUs are paused and synchronized at this point */
>>>      s390_cmma_reset();
>>>  
>>> +    cpu = S390_CPU(cs);
>>> +    env = &cpu->env;
>>
>> Can you just pass "cpu" to s390_pv_prepare_reset() and handle it in there?
> 
> Wouldn't it make more sense to test the machine state here anyway?

Whatever you prefer. Just saying, that introducing "CPUS390XState *env"
in this function can be avoided.

> 
>>
>>> +
>>>      switch (reset_type) {
>>>      case S390_RESET_EXTERNAL:
>>>      case S390_RESET_REIPL:
>>> +        if (ms->pv) {
>>> +            s390_machine_unprotect(ms);
>>> +        }
>>> +
>>>          qemu_devices_reset();
>>>          s390_crypto_reset();
>>>  
>>> @@ -337,22 +428,52 @@ static void s390_machine_reset(MachineState *machine)
>>>          run_on_cpu(cs, s390_do_cpu_ipl, RUN_ON_CPU_NULL);
>>>          break;
>>>      case S390_RESET_MODIFIED_CLEAR:
>>> +        /*
>>> +         * Susbsystem reset needs to be done before we unshare memory
>>> +         * and loose access to VIRTIO structures in guest memory.
>>> +         */
>>> +        subsystem_reset();
>>> +        s390_crypto_reset();
>>> +        s390_pv_prepare_reset(env);
>>>          CPU_FOREACH(t) {
>>>              run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
>>>          }
>>> -        subsystem_reset();
>>> -        s390_crypto_reset();
>>>          run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL);
>>>          break;
>>>      case S390_RESET_LOAD_NORMAL:
>>> +        /*
>>> +         * Susbsystem reset needs to be done before we unshare memory
>>> +         * and loose access to VIRTIO structures in guest memory.
>>> +         */
>>> +        subsystem_reset();
>>> +        s390_pv_prepare_reset(env);
>>>          CPU_FOREACH(t) {
>>>              if (t == cs) {
>>>                  continue;
>>>              }
>>>              run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL);
>>>          }
>>> -        subsystem_reset();
>>>          run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL);
>>> +        run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL);
>>> +        break;
>>> +    case S390_RESET_PV: /* Subcode 10 */
>>> +        subsystem_reset();
>>> +        s390_crypto_reset();
>>> +
>>> +        CPU_FOREACH(t) {
>>> +            if (t == cs) {
>>> +                continue;
>>> +            }
>>> +            run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
>>> +        }
>>> +        run_on_cpu(cs, s390_do_cpu_reset, RUN_ON_CPU_NULL);
>>> +
>>> +        if (s390_machine_protect(ms)) {
>>> +            s390_machine_inject_pv_error(cs);
>>> +            s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu);
>>> +            return;
>>> +        }
>>> +
>>>          run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL);
>>
>> [...]
>>
>>>  
>>> +#if !defined(CONFIG_USER_ONLY)
>>> +static bool machine_is_pv(MachineState *ms)
>>> +{
>>> +    Object *obj;
>>> +
>>> +    /* we have to bail out for the "none" machine */
>>> +    obj = object_dynamic_cast(OBJECT(ms), TYPE_S390_CCW_MACHINE);
>>> +     if (!obj) {
>>> +        return false;
>>> +    }
>>> +    return S390_CCW_MACHINE(obj)->pv;
>>
>> Maybe you want to cache the machine, so you can avoid the
>> lookup+conversion on every new CPU.
> 
> Christian has provided me with a fix to this code, I'll squash it.
> 
>>
>>> +}
>>> +#endif
>>
>> [...]
>>>  static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr,
>>>                                uintptr_t ra, bool write)
>>>  {
>>> +    /* Handled by the Ultravisor */
>>> +    if (env->pv) {
>>> +        return 0;
>>> +    }
>>>      if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) {
>>>          s390_program_interrupt(env, PGM_SPECIFICATION, ra);
>>>          return -1;
>>> @@ -93,6 +101,11 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra)
>>>          return;
>>>      }
>>>  
>>> +    if (subcode > 7 && !s390_has_feat(S390_FEAT_UNPACK)) {
>>
>>> = DIAG308_PV_SET
> 
> ?

Instead of the magic number seven, check against "subcode >= DIAG308_PV_SET"
Christian Borntraeger March 10, 2020, 9:27 a.m. UTC | #8
On 09.03.20 15:40, Christian Borntraeger wrote:
> something like the following?
> 
> 
> diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
> index c513f8efe0..cd12c29b9a 100644
> --- a/target/s390x/cpu.c
> +++ b/target/s390x/cpu.c
> @@ -185,13 +185,18 @@ static void s390_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
>  static bool machine_is_pv(MachineState *ms)
>  {
>      Object *obj;
> +    static S390CcwMachineState *ccw;
> +
> +    if (ccw)
> +           return ccw->pv;

And I think we need curly braces: {}
Sorry. Was doing too many kernel things :-)
diff mbox series

Patch

diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index e02ed80b68..a46a1c7894 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -31,6 +31,7 @@  obj-y += tod-qemu.o
 obj-$(CONFIG_KVM) += tod-kvm.o
 obj-$(CONFIG_KVM) += s390-skeys-kvm.o
 obj-$(CONFIG_KVM) += s390-stattrib-kvm.o
+obj-$(CONFIG_KVM) += pv.o
 obj-y += s390-ccw.o
 obj-y += ap-device.o
 obj-y += ap-bridge.o
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 9c1ecd423c..ba3eac30c6 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -33,6 +33,7 @@ 
 #include "qemu/cutils.h"
 #include "qemu/option.h"
 #include "exec/exec-all.h"
+#include "pv.h"
 
 #define KERN_IMAGE_START                0x010000UL
 #define LINUX_MAGIC_ADDR                0x010008UL
@@ -542,11 +543,30 @@  void s390_ipl_update_diag308(IplParameterBlock *iplb)
 {
     S390IPLState *ipl = get_ipl_device();
 
-    ipl->iplb = *iplb;
-    ipl->iplb_valid = true;
+    /*
+     * The IPLB set and retrieved by subcodes 8/9 is completely
+     * separate from the one managed via subcodes 5/6.
+     */
+    if (iplb->pbt == S390_IPL_TYPE_PV) {
+        ipl->iplb_pv = *iplb;
+        ipl->iplb_valid_pv = true;
+    } else {
+        ipl->iplb = *iplb;
+        ipl->iplb_valid = true;
+    }
     ipl->netboot = is_virtio_net_device(iplb);
 }
 
+IplParameterBlock *s390_ipl_get_iplb_pv(void)
+{
+    S390IPLState *ipl = get_ipl_device();
+
+    if (!ipl->iplb_valid_pv) {
+        return NULL;
+    }
+    return &ipl->iplb_pv;
+}
+
 IplParameterBlock *s390_ipl_get_iplb(void)
 {
     S390IPLState *ipl = get_ipl_device();
@@ -561,7 +581,8 @@  void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type)
 {
     S390IPLState *ipl = get_ipl_device();
 
-    if (reset_type == S390_RESET_EXTERNAL || reset_type == S390_RESET_REIPL) {
+    if (reset_type == S390_RESET_EXTERNAL || reset_type == S390_RESET_REIPL ||
+        reset_type == S390_RESET_PV) {
         /* use CPU 0 for full resets */
         ipl->reset_cpu_index = 0;
     } else {
@@ -635,6 +656,38 @@  static void s390_ipl_prepare_qipl(S390CPU *cpu)
     cpu_physical_memory_unmap(addr, len, 1, len);
 }
 
+int s390_ipl_prepare_pv_header(void)
+{
+    S390IPLState *ipl = get_ipl_device();
+    IPLBlockPV *ipib_pv = &ipl->iplb_pv.pv;
+    void *hdr = g_malloc(ipib_pv->pv_header_len);
+    int rc;
+
+    cpu_physical_memory_read(ipib_pv->pv_header_addr, hdr,
+                             ipib_pv->pv_header_len);
+    rc = s390_pv_set_sec_parms((uint64_t)hdr,
+                          ipib_pv->pv_header_len);
+    g_free(hdr);
+    return rc;
+}
+
+int s390_ipl_pv_unpack(void)
+{
+    int i, rc = 0;
+    S390IPLState *ipl = get_ipl_device();
+    IPLBlockPV *ipib_pv = &ipl->iplb_pv.pv;
+
+    for (i = 0; i < ipib_pv->num_comp; i++) {
+        rc = s390_pv_unpack(ipib_pv->components[i].addr,
+                            TARGET_PAGE_ALIGN(ipib_pv->components[i].size),
+                            ipib_pv->components[i].tweak_pref);
+        if (rc) {
+            break;
+        }
+    }
+    return rc;
+}
+
 void s390_ipl_prepare_cpu(S390CPU *cpu)
 {
     S390IPLState *ipl = get_ipl_device();
diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
index d4813105db..b2ccdd9dae 100644
--- a/hw/s390x/ipl.h
+++ b/hw/s390x/ipl.h
@@ -15,6 +15,24 @@ 
 #include "cpu.h"
 #include "hw/qdev-core.h"
 
+struct IPLBlockPVComp {
+    uint64_t tweak_pref;
+    uint64_t addr;
+    uint64_t size;
+} QEMU_PACKED;
+typedef struct IPLBlockPVComp IPLBlockPVComp;
+
+struct IPLBlockPV {
+    uint8_t  reserved18[87];    /* 0x18 */
+    uint8_t  version;           /* 0x6f */
+    uint32_t reserved70;        /* 0x70 */
+    uint32_t num_comp;          /* 0x70 */
+    uint64_t pv_header_addr;    /* 0x74 */
+    uint64_t pv_header_len;     /* 0x7c */
+    struct IPLBlockPVComp components[];
+} QEMU_PACKED;
+typedef struct IPLBlockPV IPLBlockPV;
+
 struct IplBlockCcw {
     uint8_t  reserved0[85];
     uint8_t  ssid;
@@ -71,6 +89,7 @@  union IplParameterBlock {
         union {
             IplBlockCcw ccw;
             IplBlockFcp fcp;
+            IPLBlockPV pv;
             IplBlockQemuScsi scsi;
         };
     } QEMU_PACKED;
@@ -85,8 +104,11 @@  typedef union IplParameterBlock IplParameterBlock;
 
 int s390_ipl_set_loadparm(uint8_t *loadparm);
 void s390_ipl_update_diag308(IplParameterBlock *iplb);
+int s390_ipl_prepare_pv_header(void);
+int s390_ipl_pv_unpack(void);
 void s390_ipl_prepare_cpu(S390CPU *cpu);
 IplParameterBlock *s390_ipl_get_iplb(void);
+IplParameterBlock *s390_ipl_get_iplb_pv(void);
 
 enum s390_reset {
     /* default is a reset not triggered by a CPU e.g. issued by QMP */
@@ -94,6 +116,7 @@  enum s390_reset {
     S390_RESET_REIPL,
     S390_RESET_MODIFIED_CLEAR,
     S390_RESET_LOAD_NORMAL,
+    S390_RESET_PV,
 };
 void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type);
 void s390_ipl_get_reset_request(CPUState **cs, enum s390_reset *reset_type);
@@ -133,6 +156,7 @@  struct S390IPLState {
     /*< private >*/
     DeviceState parent_obj;
     IplParameterBlock iplb;
+    IplParameterBlock iplb_pv;
     QemuIplParameters qipl;
     uint64_t start_addr;
     uint64_t compat_start_addr;
@@ -140,6 +164,7 @@  struct S390IPLState {
     uint64_t compat_bios_start_addr;
     bool enforce_bios;
     bool iplb_valid;
+    bool iplb_valid_pv;
     bool netboot;
     /* reset related properties don't have to be migrated or reset */
     enum s390_reset reset_type;
@@ -161,9 +186,11 @@  QEMU_BUILD_BUG_MSG(offsetof(S390IPLState, iplb) & 3, "alignment of iplb wrong");
 
 #define S390_IPL_TYPE_FCP 0x00
 #define S390_IPL_TYPE_CCW 0x02
+#define S390_IPL_TYPE_PV 0x05
 #define S390_IPL_TYPE_QEMU_SCSI 0xff
 
 #define S390_IPLB_HEADER_LEN 8
+#define S390_IPLB_MIN_PV_LEN 148
 #define S390_IPLB_MIN_CCW_LEN 200
 #define S390_IPLB_MIN_FCP_LEN 384
 #define S390_IPLB_MIN_QEMU_SCSI_LEN 200
@@ -173,16 +200,49 @@  static inline bool iplb_valid_len(IplParameterBlock *iplb)
     return be32_to_cpu(iplb->len) <= sizeof(IplParameterBlock);
 }
 
-static inline bool iplb_valid_ccw(IplParameterBlock *iplb)
+static inline bool s390_ipl_pv_check_components(IplParameterBlock *iplb)
 {
-    return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_CCW_LEN &&
-           iplb->pbt == S390_IPL_TYPE_CCW;
+    IPLBlockPV *ipib_pv = &iplb->pv;
+    int i;
+
+    if (ipib_pv->num_comp == 0) {
+        return false;
+    }
+
+    for (i = 0; i < ipib_pv->num_comp; i++) {
+        /* Addr must be 4k aligned */
+        if (ipib_pv->components[i].addr & ~TARGET_PAGE_MASK) {
+            return false;
+        }
+
+        /* Tweak prefix is monotonically increasing with each component */
+        if (i < ipib_pv->num_comp - 1 &&
+            ipib_pv->components[i].tweak_pref >=
+            ipib_pv->components[i + 1].tweak_pref) {
+            return false;
+        }
+    }
+    return true;
 }
 
-static inline bool iplb_valid_fcp(IplParameterBlock *iplb)
+static inline bool iplb_valid(IplParameterBlock *iplb)
 {
-    return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_FCP_LEN &&
-           iplb->pbt == S390_IPL_TYPE_FCP;
+    switch (iplb->pbt) {
+        case S390_IPL_TYPE_FCP:
+            return (be32_to_cpu(iplb->len) >= S390_IPLB_MIN_FCP_LEN &&
+                    iplb->pbt == S390_IPL_TYPE_FCP);
+        case S390_IPL_TYPE_CCW:
+            return (be32_to_cpu(iplb->len) >= S390_IPLB_MIN_CCW_LEN &&
+                    iplb->pbt == S390_IPL_TYPE_CCW);
+        case S390_IPL_TYPE_PV:
+            if(be32_to_cpu(iplb->len) < S390_IPLB_MIN_PV_LEN ||
+               iplb->pbt != S390_IPL_TYPE_PV) {
+                return false;
+            }
+            return s390_ipl_pv_check_components(iplb);
+    default:
+        return false;
+    }
 }
 
 #endif
diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c
new file mode 100644
index 0000000000..ba6409246e
--- /dev/null
+++ b/hw/s390x/pv.c
@@ -0,0 +1,104 @@ 
+/*
+ * Secure execution functions
+ *
+ * Copyright IBM Corp. 2020
+ * Author(s):
+ *  Janosch Frank <frankja@linux.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#include "qemu/osdep.h"
+
+#include <linux/kvm.h>
+
+#include "qemu/error-report.h"
+#include "sysemu/kvm.h"
+#include "pv.h"
+
+const char *cmd_names[] = {
+    "VM_ENABLE",
+    "VM_DISABLE",
+    "VM_SET_SEC_PARAMS",
+    "VM_UNPACK",
+    "VM_VERIFY",
+    "VM_PREP_RESET",
+    "VM_UNSHARE_ALL",
+};
+
+static int s390_pv_cmd(uint32_t cmd, void *data)
+{
+    int rc;
+    struct kvm_pv_cmd pv_cmd = {
+        .cmd = cmd,
+        .data = (uint64_t)data,
+    };
+
+    rc = kvm_vm_ioctl(kvm_state, KVM_S390_PV_COMMAND, &pv_cmd);
+    if (rc) {
+        error_report("KVM PV command %d (%s) failed: header rc %x rrc %x "
+                     "IOCTL rc: %d", cmd, cmd_names[cmd], pv_cmd.rc, pv_cmd.rrc,
+                     rc);
+    }
+    return rc;
+}
+
+static void s390_pv_cmd_exit(uint32_t cmd, void *data)
+{
+    int rc;
+
+    rc = s390_pv_cmd(cmd, data);
+    if (rc) {
+        exit(1);
+    }
+}
+
+int s390_pv_vm_enable(void)
+{
+    return s390_pv_cmd(KVM_PV_ENABLE, NULL);
+}
+
+void s390_pv_vm_disable(void)
+{
+     s390_pv_cmd_exit(KVM_PV_DISABLE, NULL);
+}
+
+int s390_pv_set_sec_parms(uint64_t origin, uint64_t length)
+{
+    struct kvm_s390_pv_sec_parm args = {
+        .origin = origin,
+        .length = length,
+    };
+
+    return s390_pv_cmd(KVM_PV_VM_SET_SEC_PARMS, &args);
+}
+
+/*
+ * Called for each component in the SE type IPL parameter block 0.
+ */
+int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak)
+{
+    struct kvm_s390_pv_unp args = {
+        .addr = addr,
+        .size = size,
+        .tweak = tweak,
+    };
+
+    return s390_pv_cmd(KVM_PV_VM_UNPACK, &args);
+}
+
+void s390_pv_perf_clear_reset(void)
+{
+    s390_pv_cmd_exit(KVM_PV_VM_PREP_RESET, NULL);
+}
+
+int s390_pv_verify(void)
+{
+    return s390_pv_cmd(KVM_PV_VM_VERIFY, NULL);
+}
+
+void s390_pv_unshare(void)
+{
+    s390_pv_cmd_exit(KVM_PV_VM_UNSHARE_ALL, NULL);
+}
diff --git a/hw/s390x/pv.h b/hw/s390x/pv.h
new file mode 100644
index 0000000000..e58fbca96a
--- /dev/null
+++ b/hw/s390x/pv.h
@@ -0,0 +1,34 @@ 
+/*
+ * Protected Virtualization header
+ *
+ * Copyright IBM Corp. 2020
+ * Author(s):
+ *  Janosch Frank <frankja@linux.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390_PV_H
+#define HW_S390_PV_H
+
+#ifdef CONFIG_KVM
+int s390_pv_vm_enable(void);
+void s390_pv_vm_disable(void);
+int s390_pv_set_sec_parms(uint64_t origin, uint64_t length);
+int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak);
+void s390_pv_perf_clear_reset(void);
+int s390_pv_verify(void);
+void s390_pv_unshare(void);
+#else
+static inline int s390_pv_vm_enable(void) { return 0; }
+static inline void s390_pv_vm_disable(void) {}
+static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; }
+static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; }
+static inline void s390_pv_perf_clear_reset(void) {}
+static inline int s390_pv_verify(void) { return 0; }
+static inline void s390_pv_unshare(void) {}
+#endif
+
+#endif /* HW_S390_PV_H */
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index a89cf4c129..f718cfc591 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -41,6 +41,8 @@ 
 #include "hw/qdev-properties.h"
 #include "hw/s390x/tod.h"
 #include "sysemu/sysemu.h"
+#include "hw/s390x/pv.h"
+#include <linux/kvm.h>
 
 S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)
 {
@@ -238,9 +240,11 @@  static void s390_create_sclpconsole(const char *type, Chardev *chardev)
 static void ccw_init(MachineState *machine)
 {
     int ret;
+    S390CcwMachineState *ms = S390_CCW_MACHINE(machine);
     VirtualCssBus *css_bus;
     DeviceState *dev;
 
+    ms->pv = false;
     s390_sclp_init();
     /* init memory + setup max page size. Required for the CPU model */
     s390_memory_init(machine->ram);
@@ -316,10 +320,90 @@  static inline void s390_do_cpu_ipl(CPUState *cs, run_on_cpu_data arg)
     s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu);
 }
 
+static void s390_machine_unprotect(S390CcwMachineState *ms)
+{
+    CPUState *t;
+
+    if (ms->pv) {
+        s390_pv_vm_disable();
+        CPU_FOREACH(t) {
+            S390_CPU(t)->env.pv = false;
+        }
+        ms->pv = false;
+    }
+}
+
+static int s390_machine_protect(S390CcwMachineState *ms)
+{
+    CPUState *t;
+    int rc;
+
+    /* Create SE VM */
+    rc = s390_pv_vm_enable();
+    if (rc) {
+        return rc;
+    }
+
+    CPU_FOREACH(t) {
+        S390_CPU(t)->env.pv = true;
+    }
+    ms->pv = true;
+
+    /* Set SE header and unpack */
+    rc = s390_ipl_prepare_pv_header();
+    if (rc) {
+        goto out_err;
+    }
+
+    /* Decrypt image */
+    rc = s390_ipl_pv_unpack();
+    if (rc) {
+        goto out_err;
+    }
+
+    /* Verify integrity */
+    rc = s390_pv_verify();
+    if (rc) {
+        goto out_err;
+    }
+    return rc;
+
+out_err:
+    s390_machine_unprotect(ms);
+    return rc;
+}
+
+#define DIAG_308_RC_INVAL_FOR_PV    0x0a02
+static void s390_machine_inject_pv_error(CPUState *cs)
+{
+    int r1 = (cs->kvm_run->s390_sieic.ipa & 0x00f0) >> 4;
+    CPUS390XState *env = &S390_CPU(cs)->env;
+
+    /* Report that we are unable to enter protected mode */
+    env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV;
+}
+
+static void s390_pv_prepare_reset(CPUS390XState *env)
+{
+    CPUState *cs;
+
+    if (!env->pv) {
+        return;
+    }
+    CPU_FOREACH(cs) {
+        s390_cpu_set_state(S390_CPU_STATE_STOPPED, S390_CPU(cs));
+    }
+    s390_pv_unshare();
+    s390_pv_perf_clear_reset();
+}
+
 static void s390_machine_reset(MachineState *machine)
 {
     enum s390_reset reset_type;
     CPUState *cs, *t;
+    S390CPU *cpu;
+    S390CcwMachineState *ms = S390_CCW_MACHINE(machine);
+    CPUS390XState *env;
 
     /* get the reset parameters, reset them once done */
     s390_ipl_get_reset_request(&cs, &reset_type);
@@ -327,9 +411,16 @@  static void s390_machine_reset(MachineState *machine)
     /* all CPUs are paused and synchronized at this point */
     s390_cmma_reset();
 
+    cpu = S390_CPU(cs);
+    env = &cpu->env;
+
     switch (reset_type) {
     case S390_RESET_EXTERNAL:
     case S390_RESET_REIPL:
+        if (ms->pv) {
+            s390_machine_unprotect(ms);
+        }
+
         qemu_devices_reset();
         s390_crypto_reset();
 
@@ -337,22 +428,52 @@  static void s390_machine_reset(MachineState *machine)
         run_on_cpu(cs, s390_do_cpu_ipl, RUN_ON_CPU_NULL);
         break;
     case S390_RESET_MODIFIED_CLEAR:
+        /*
+         * Susbsystem reset needs to be done before we unshare memory
+         * and loose access to VIRTIO structures in guest memory.
+         */
+        subsystem_reset();
+        s390_crypto_reset();
+        s390_pv_prepare_reset(env);
         CPU_FOREACH(t) {
             run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
         }
-        subsystem_reset();
-        s390_crypto_reset();
         run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL);
         break;
     case S390_RESET_LOAD_NORMAL:
+        /*
+         * Susbsystem reset needs to be done before we unshare memory
+         * and loose access to VIRTIO structures in guest memory.
+         */
+        subsystem_reset();
+        s390_pv_prepare_reset(env);
         CPU_FOREACH(t) {
             if (t == cs) {
                 continue;
             }
             run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL);
         }
-        subsystem_reset();
         run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL);
+        run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL);
+        break;
+    case S390_RESET_PV: /* Subcode 10 */
+        subsystem_reset();
+        s390_crypto_reset();
+
+        CPU_FOREACH(t) {
+            if (t == cs) {
+                continue;
+            }
+            run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
+        }
+        run_on_cpu(cs, s390_do_cpu_reset, RUN_ON_CPU_NULL);
+
+        if (s390_machine_protect(ms)) {
+            s390_machine_inject_pv_error(cs);
+            s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu);
+            return;
+        }
+
         run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL);
         break;
     default:
diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h
index 8aa27199c9..cd1dccc6e3 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -28,6 +28,7 @@  typedef struct S390CcwMachineState {
     /*< public >*/
     bool aes_key_wrap;
     bool dea_key_wrap;
+    bool pv;
     uint8_t loadparm[8];
 } S390CcwMachineState;
 
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index 3dd396e870..5109b8bbf5 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -37,6 +37,8 @@ 
 #include "sysemu/hw_accel.h"
 #include "hw/qdev-properties.h"
 #ifndef CONFIG_USER_ONLY
+#include "hw/s390x/s390-virtio-ccw.h"
+#include "hw/s390x/pv.h"
 #include "hw/boards.h"
 #include "sysemu/arch_init.h"
 #include "sysemu/sysemu.h"
@@ -174,6 +176,20 @@  static void s390_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
     info->print_insn = print_insn_s390;
 }
 
+#if !defined(CONFIG_USER_ONLY)
+static bool machine_is_pv(MachineState *ms)
+{
+    Object *obj;
+
+    /* we have to bail out for the "none" machine */
+    obj = object_dynamic_cast(OBJECT(ms), TYPE_S390_CCW_MACHINE);
+     if (!obj) {
+        return false;
+    }
+    return S390_CCW_MACHINE(obj)->pv;
+}
+#endif
+
 static void s390_cpu_realizefn(DeviceState *dev, Error **errp)
 {
     CPUState *cs = CPU(dev);
@@ -205,6 +221,7 @@  static void s390_cpu_realizefn(DeviceState *dev, Error **errp)
         goto out;
     }
 
+    cpu->env.pv = machine_is_pv(ms);
     /* sync cs->cpu_index and env->core_id. The latter is needed for TCG. */
     cs->cpu_index = cpu->env.core_id;
 #endif
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 1d17709d6e..7e4d9d267c 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -114,6 +114,7 @@  struct CPUS390XState {
 
     /* Fields up to this point are cleared by a CPU reset */
     struct {} end_reset_fields;
+    bool pv; /* protected virtualization */
 
 #if !defined(CONFIG_USER_ONLY)
     uint32_t core_id; /* PoP "CPU address", same as cpu_index */
diff --git a/target/s390x/cpu_features_def.inc.h b/target/s390x/cpu_features_def.inc.h
index 31dff0d84e..60db28351d 100644
--- a/target/s390x/cpu_features_def.inc.h
+++ b/target/s390x/cpu_features_def.inc.h
@@ -107,6 +107,7 @@  DEF_FEAT(DEFLATE_BASE, "deflate-base", STFL, 151, "Deflate-conversion facility (
 DEF_FEAT(VECTOR_PACKED_DECIMAL_ENH, "vxpdeh", STFL, 152, "Vector-Packed-Decimal-Enhancement Facility")
 DEF_FEAT(MSA_EXT_9, "msa9-base", STFL, 155, "Message-security-assist-extension-9 facility (excluding subfunctions)")
 DEF_FEAT(ETOKEN, "etoken", STFL, 156, "Etoken facility")
+DEF_FEAT(UNPACK, "unpack", STFL, 161, "Unpack facility")
 
 /* Features exposed via SCLP SCCB Byte 80 - 98  (bit numbers relative to byte-80) */
 DEF_FEAT(SIE_GSLS, "gsls", SCLP_CONF_CHAR, 40, "SIE: Guest-storage-limit-suppression facility")
diff --git a/target/s390x/diag.c b/target/s390x/diag.c
index b5aec06d6b..670db0eb53 100644
--- a/target/s390x/diag.c
+++ b/target/s390x/diag.c
@@ -52,6 +52,7 @@  int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3)
 #define DIAG_308_RC_OK              0x0001
 #define DIAG_308_RC_NO_CONF         0x0102
 #define DIAG_308_RC_INVALID         0x0402
+#define DIAG_308_RC_NO_PV_CONF      0x0902
 
 #define DIAG308_RESET_MOD_CLR       0
 #define DIAG308_RESET_LOAD_NORM     1
@@ -59,10 +60,17 @@  int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3)
 #define DIAG308_LOAD_NORMAL_DUMP    4
 #define DIAG308_SET                 5
 #define DIAG308_STORE               6
+#define DIAG308_PV_SET              8
+#define DIAG308_PV_STORE            9
+#define DIAG308_PV_START            10
 
 static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr,
                               uintptr_t ra, bool write)
 {
+    /* Handled by the Ultravisor */
+    if (env->pv) {
+        return 0;
+    }
     if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) {
         s390_program_interrupt(env, PGM_SPECIFICATION, ra);
         return -1;
@@ -93,6 +101,11 @@  void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra)
         return;
     }
 
+    if (subcode > 7 && !s390_has_feat(S390_FEAT_UNPACK)) {
+        s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+        return;
+    }
+
     switch (subcode) {
     case DIAG308_RESET_MOD_CLR:
         s390_ipl_reset_request(cs, S390_RESET_MODIFIED_CLEAR);
@@ -105,6 +118,7 @@  void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra)
         s390_ipl_reset_request(cs, S390_RESET_REIPL);
         break;
     case DIAG308_SET:
+    case DIAG308_PV_SET:
         if (diag308_parm_check(env, r1, addr, ra, false)) {
             return;
         }
@@ -117,7 +131,7 @@  void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra)
 
         cpu_physical_memory_read(addr, iplb, be32_to_cpu(iplb->len));
 
-        if (!iplb_valid_ccw(iplb) && !iplb_valid_fcp(iplb)) {
+        if (!iplb_valid(iplb)) {
             env->regs[r1 + 1] = DIAG_308_RC_INVALID;
             goto out;
         }
@@ -128,17 +142,31 @@  out:
         g_free(iplb);
         return;
     case DIAG308_STORE:
+    case DIAG308_PV_STORE:
         if (diag308_parm_check(env, r1, addr, ra, true)) {
             return;
         }
-        iplb = s390_ipl_get_iplb();
+        if (subcode == DIAG308_PV_STORE) {
+            iplb = s390_ipl_get_iplb_pv();
+        } else {
+            iplb = s390_ipl_get_iplb();
+        }
         if (iplb) {
             cpu_physical_memory_write(addr, iplb, be32_to_cpu(iplb->len));
             env->regs[r1 + 1] = DIAG_308_RC_OK;
         } else {
             env->regs[r1 + 1] = DIAG_308_RC_NO_CONF;
         }
-        return;
+        break;
+    case DIAG308_PV_START:
+        iplb = s390_ipl_get_iplb_pv();
+        if (!iplb) {
+            env->regs[r1 + 1] = DIAG_308_RC_NO_PV_CONF;
+            return;
+        }
+
+        s390_ipl_reset_request(cs, S390_RESET_PV);
+        break;
     default:
         s390_program_interrupt(env, PGM_SPECIFICATION, ra);
         break;