diff mbox

[v3,5/7] s390x/vfio: ap: Introduce VFIO AP device

Message ID 1521156300-19296-6-git-send-email-akrowiak@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tony Krowiak March 15, 2018, 11:24 p.m. UTC
Introduces a VFIO based AP device. The device is defined via
the QEMU command line by specifying:

    -device vfio-ap,sysfsdev=<path-to-mediated-matrix-device>

The mediated matrix device is created by the VFIO AP device
driver by writing a UUID to a sysfs attribute file (see
docs/vfio-ap.txt). The mediated matrix device will be named
after the UUID. Symbolic links to the $uuid are created in
many places, so the path to the mediated matrix device $uuid
can be specified in any of the following ways:

/sys/devices/vfio_ap/matrix/$uuid
/sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid
/sys/bus/mdev/devices/$uuid
/sys/bus/mdev/drivers/vfio_mdev/$uuid

When the vfio-ap device is realized, it acquires and opens the
VFIO iommu group to which the mediated matrix device is
bound. This causes a VFIO group notification event to be
signaled. The vfio_ap device driver's group notification
handler will get called at which time the device driver
will configure the the AP devices to which the guest will
be granted access.

Signed-off-by: Tony Krowiak <akrowiak@linux.vnet.ibm.com>
---
 default-configs/s390x-softmmu.mak |    1 +
 hw/vfio/Makefile.objs             |    1 +
 hw/vfio/ap.c                      |  184 +++++++++++++++++++++++++++++++++++++
 include/hw/vfio/vfio-common.h     |    1 +
 4 files changed, 187 insertions(+), 0 deletions(-)
 create mode 100644 hw/vfio/ap.c

Comments

Pierre Morel March 16, 2018, 10:42 a.m. UTC | #1
On 16/03/2018 00:24, Tony Krowiak wrote:
> Introduces a VFIO based AP device. The device is defined via
> the QEMU command line by specifying:
>
>      -device vfio-ap,sysfsdev=<path-to-mediated-matrix-device>
>
> The mediated matrix device is created by the VFIO AP device
> driver by writing a UUID to a sysfs attribute file (see
> docs/vfio-ap.txt). The mediated matrix device will be named
> after the UUID. Symbolic links to the $uuid are created in
> many places, so the path to the mediated matrix device $uuid
> can be specified in any of the following ways:
>
> /sys/devices/vfio_ap/matrix/$uuid
> /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid
> /sys/bus/mdev/devices/$uuid
> /sys/bus/mdev/drivers/vfio_mdev/$uuid
>
> When the vfio-ap device is realized, it acquires and opens the
> VFIO iommu group to which the mediated matrix device is
> bound. This causes a VFIO group notification event to be
> signaled. The vfio_ap device driver's group notification
> handler will get called at which time the device driver
> will configure the the AP devices to which the guest will
> be granted access.
>
> Signed-off-by: Tony Krowiak <akrowiak@linux.vnet.ibm.com>
> ---
>   default-configs/s390x-softmmu.mak |    1 +
>   hw/vfio/Makefile.objs             |    1 +
>   hw/vfio/ap.c                      |  184 +++++++++++++++++++++++++++++++++++++
>   include/hw/vfio/vfio-common.h     |    1 +
>   4 files changed, 187 insertions(+), 0 deletions(-)
>   create mode 100644 hw/vfio/ap.c
>
> diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak
> index 2f4bfe7..0b784b6 100644
> --- a/default-configs/s390x-softmmu.mak
> +++ b/default-configs/s390x-softmmu.mak
> @@ -9,3 +9,4 @@ CONFIG_S390_FLIC=y
>   CONFIG_S390_FLIC_KVM=$(CONFIG_KVM)
>   CONFIG_VFIO_CCW=$(CONFIG_LINUX)
>   CONFIG_WDT_DIAG288=y
> +CONFIG_VFIO_AP=$(CONFIG_LINUX)
> diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
> index c3ab909..7300860 100644
> --- a/hw/vfio/Makefile.objs
> +++ b/hw/vfio/Makefile.objs
> @@ -6,4 +6,5 @@ obj-$(CONFIG_SOFTMMU) += platform.o
>   obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o
>   obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o
>   obj-$(CONFIG_SOFTMMU) += spapr.o
> +obj-$(CONFIG_VFIO_AP) += ap.o
>   endif
> diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
> new file mode 100644
> index 0000000..b397bb1
> --- /dev/null
> +++ b/hw/vfio/ap.c
> @@ -0,0 +1,184 @@
> +/*
> + * VFIO based AP matrix device assignment
> + *
> + * Copyright 2018 IBM Corp.
> + * Author(s): Tony Krowiak <akrowiak@linux.vnet.ibm.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or (at
> + * your option) any later version. See the COPYING file in the top-level
> + * directory.
> + */
> +
> +#include <linux/vfio.h>
> +#include <sys/ioctl.h>
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "hw/sysbus.h"
> +#include "hw/vfio/vfio.h"
> +#include "hw/vfio/vfio-common.h"
> +#include "hw/s390x/ap-device.h"
> +#include "qemu/error-report.h"
> +#include "qemu/queue.h"
> +#include "cpu.h"
> +#include "kvm_s390x.h"
> +
> +#define VFIO_AP_DEVICE_TYPE      "vfio-ap"
> +
> +typedef struct VFIOAPDevice {
> +    APDevice apdev;
> +    VFIODevice vdev;
> +    QTAILQ_ENTRY(VFIOAPDevice) sibling;
> +} VFIOAPDevice;
> +
> +static void vfio_ap_compute_needs_reset(VFIODevice *vdev)
> +{
> +    vdev->needs_reset = false;
> +}
> +
> +/*
> + * We don't need vfio_hot_reset_multi and vfio_eoi operations for
> + * vfio-ap-matrix device now.
> + */
> +struct VFIODeviceOps vfio_ap_ops = {
> +    .vfio_compute_needs_reset = vfio_ap_compute_needs_reset,
> +};
> +
> +static QTAILQ_HEAD(, VFIOAPDevice) vfio_ap_devices =
> +        QTAILQ_HEAD_INITIALIZER(vfio_ap_devices);
> +
> +static void vfio_put_device(VFIOAPDevice *apdev)
> +{
> +    g_free(apdev->vdev.name);
> +    vfio_put_base_device(&apdev->vdev);
> +}
> +
> +static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp)
> +{
> +    char *tmp, group_path[PATH_MAX];
> +    ssize_t len;
> +    int groupid;
> +
> +    tmp = g_strdup_printf("%s/iommu_group", vapdev->vdev.sysfsdev);
> +    len = readlink(tmp, group_path, sizeof(group_path));
> +    g_free(tmp);
> +
> +    if (len <= 0 || len >= sizeof(group_path)) {
> +        error_setg(errp, "%s: no iommu_group found for %s",
> +                   VFIO_AP_DEVICE_TYPE, vapdev->vdev.sysfsdev);
> +        return NULL;
> +    }
> +
> +    group_path[len] = 0;
> +
> +    if (sscanf(basename(group_path), "%d", &groupid) != 1) {
> +        error_setg(errp, "vfio: failed to read %s", group_path);
> +        return NULL;
> +    }
> +
> +    return vfio_get_group(groupid, &address_space_memory, errp);
> +}
> +
> +static void vfio_ap_realize(DeviceState *dev, Error **errp)
> +{
> +    VFIODevice *vbasedev;
> +    VFIOGroup *vfio_group;
> +    APDevice *apdev = DO_UPCAST(APDevice, parent_obj, dev);
> +    VFIOAPDevice *vapdev = DO_UPCAST(VFIOAPDevice, apdev, apdev);
> +    char *mdevid;
> +    Error *local_err = NULL;
> +    int ret;
> +
> +    if (!s390_has_feat(S390_FEAT_AP)) {
> +        error_setg(&local_err, "AP support not enabled");
> +        goto out_err;
> +    }
> +
> +    ret = kvm_s390_set_interpret_ap(1);

If we have several devices, this is called once per device.
However it is a general switch for the VM.
It looks strange to have this inside the realize callback

> +    if (ret) {
> +        error_setg_errno(&local_err, errno,
> +                         "error setting interpretive execution of AP instructions");
> +        goto out_err;
> +    }
> +
> +    vfio_group = vfio_ap_get_group(vapdev, &local_err);
> +    if (!vfio_group) {
> +        goto out_group_err;
> +    }
> +
> +    vapdev->vdev.ops = &vfio_ap_ops;
> +    vapdev->vdev.type = VFIO_DEVICE_TYPE_AP;
> +    mdevid = basename(vapdev->vdev.sysfsdev);
> +    vapdev->vdev.name = g_strdup_printf("%s", mdevid);
> +    vapdev->vdev.dev = dev;
> +    QLIST_FOREACH(vbasedev, &vfio_group->device_list, next) {
> +        if (strcmp(vbasedev->name, vapdev->vdev.name) == 0) {
> +            error_setg(&local_err,
> +                       "%s: AP device %s has already been realized",
> +                       VFIO_AP_DEVICE_TYPE, vapdev->vdev.name);
> +            goto out_device_err;
> +        }
> +    }
> +
> +    ret = vfio_get_device(vfio_group, mdevid, &vapdev->vdev, &local_err);
> +    if (ret) {
> +        goto out_device_err;
> +    }
> +
> +    QTAILQ_INSERT_TAIL(&vfio_ap_devices, vapdev, sibling);
> +
> +    return;
> +
> +
> +out_device_err:
> +    vfio_put_group(vfio_group);
> +out_group_err:
> +    kvm_s390_set_interpret_ap(0);
> +out_err:
> +    error_propagate(errp, local_err);
> +}
> +
> +static void vfio_ap_unrealize(DeviceState *dev, Error **errp)
> +{
> +    APDevice *apdev = DO_UPCAST(APDevice, parent_obj, dev);
> +    VFIOAPDevice *vapdev = DO_UPCAST(VFIOAPDevice, apdev, apdev);
> +    VFIOGroup *group = vapdev->vdev.group;
> +
> +    vfio_put_device(vapdev);
> +    vfio_put_group(group);
> +    kvm_s390_set_interpret_ap(0);
> +}
> +
> +static Property vfio_ap_properties[] = {
> +    DEFINE_PROP_STRING("sysfsdev", VFIOAPDevice, vdev.sysfsdev),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static const VMStateDescription vfio_ap_vmstate = {
> +    .name = VFIO_AP_DEVICE_TYPE,
> +    .unmigratable = 1,
> +};
> +
> +static void vfio_ap_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +
> +    dc->props = vfio_ap_properties;
> +    dc->vmsd = &vfio_ap_vmstate;
> +    dc->desc = "VFIO-based AP device assignment";
> +    dc->realize = vfio_ap_realize;
> +    dc->unrealize = vfio_ap_unrealize;


Shouldn't we make the VFIO-AP devices unpluggable
in the case the AP device are pluggable?

> +}
> +
> +static const TypeInfo vfio_ap_info = {
> +    .name = VFIO_AP_DEVICE_TYPE,
> +    .parent = AP_DEVICE_TYPE,
> +    .instance_size = sizeof(VFIOAPDevice),
> +    .class_init = vfio_ap_class_init,
> +};
> +
> +static void register_vfio_ap_type(void)
> +{
> +    type_register_static(&vfio_ap_info);
> +}
> +
> +type_init(register_vfio_ap_type)
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index f3a2ac9..f1f22d9 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -46,6 +46,7 @@ enum {
>       VFIO_DEVICE_TYPE_PCI = 0,
>       VFIO_DEVICE_TYPE_PLATFORM = 1,
>       VFIO_DEVICE_TYPE_CCW = 2,
> +    VFIO_DEVICE_TYPE_AP = 3,
>   };
>
>   typedef struct VFIOMmap {
Halil Pasic March 16, 2018, 1:22 p.m. UTC | #2
On 03/16/2018 11:42 AM, Pierre Morel wrote:
> On 16/03/2018 00:24, Tony Krowiak wrote:
>> Introduces a VFIO based AP device. The device is defined via
>> the QEMU command line by specifying:
>>
>>      -device vfio-ap,sysfsdev=<path-to-mediated-matrix-device>
>>
>> The mediated matrix device is created by the VFIO AP device
>> driver by writing a UUID to a sysfs attribute file (see
>> docs/vfio-ap.txt). The mediated matrix device will be named
>> after the UUID. Symbolic links to the $uuid are created in
>> many places, so the path to the mediated matrix device $uuid
>> can be specified in any of the following ways:
>>
>> /sys/devices/vfio_ap/matrix/$uuid
>> /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid
>> /sys/bus/mdev/devices/$uuid
>> /sys/bus/mdev/drivers/vfio_mdev/$uuid
>>
>> When the vfio-ap device is realized, it acquires and opens the
>> VFIO iommu group to which the mediated matrix device is
>> bound. This causes a VFIO group notification event to be
>> signaled. The vfio_ap device driver's group notification
>> handler will get called at which time the device driver
>> will configure the the AP devices to which the guest will
>> be granted access.
>>
>> Signed-off-by: Tony Krowiak <akrowiak@linux.vnet.ibm.com>
>> ---
[..]
>> +static void vfio_ap_realize(DeviceState *dev, Error **errp)
>> +{
>> +    VFIODevice *vbasedev;
>> +    VFIOGroup *vfio_group;
>> +    APDevice *apdev = DO_UPCAST(APDevice, parent_obj, dev);
>> +    VFIOAPDevice *vapdev = DO_UPCAST(VFIOAPDevice, apdev, apdev);
>> +    char *mdevid;
>> +    Error *local_err = NULL;
>> +    int ret;
>> +
>> +    if (!s390_has_feat(S390_FEAT_AP)) {
>> +        error_setg(&local_err, "AP support not enabled");
>> +        goto out_err;
>> +    }
>> +
>> +    ret = kvm_s390_set_interpret_ap(1);
> 
> If we have several devices, this is called once per device.

I don't think having several of these in a single vm makes
any sense. Or does it? IMHO we should make sure there is at
most one device taking care of the crypto pass-through.

> However it is a general switch for the VM.
> It looks strange to have this inside the realize callback
> 

I kind of semi agree.

The previous solution (having this transparent for QEMU) had
benefits. Why did we move away from that again?

Regards,
Halil
Tony Krowiak March 16, 2018, 3 p.m. UTC | #3
On 03/16/2018 06:42 AM, Pierre Morel wrote:
> On 16/03/2018 00:24, Tony Krowiak wrote:
>> Introduces a VFIO based AP device. The device is defined via
>> the QEMU command line by specifying:
>>
>>      -device vfio-ap,sysfsdev=<path-to-mediated-matrix-device>
>>
>> The mediated matrix device is created by the VFIO AP device
>> driver by writing a UUID to a sysfs attribute file (see
>> docs/vfio-ap.txt). The mediated matrix device will be named
>> after the UUID. Symbolic links to the $uuid are created in
>> many places, so the path to the mediated matrix device $uuid
>> can be specified in any of the following ways:
>>
>> /sys/devices/vfio_ap/matrix/$uuid
>> /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid 
>>
>> /sys/bus/mdev/devices/$uuid
>> /sys/bus/mdev/drivers/vfio_mdev/$uuid
>>
>> When the vfio-ap device is realized, it acquires and opens the
>> VFIO iommu group to which the mediated matrix device is
>> bound. This causes a VFIO group notification event to be
>> signaled. The vfio_ap device driver's group notification
>> handler will get called at which time the device driver
>> will configure the the AP devices to which the guest will
>> be granted access.
>>
>> Signed-off-by: Tony Krowiak <akrowiak@linux.vnet.ibm.com>
>> ---
>>   default-configs/s390x-softmmu.mak |    1 +
>>   hw/vfio/Makefile.objs             |    1 +
>>   hw/vfio/ap.c                      |  184 
>> +++++++++++++++++++++++++++++++++++++
>>   include/hw/vfio/vfio-common.h     |    1 +
>>   4 files changed, 187 insertions(+), 0 deletions(-)
>>   create mode 100644 hw/vfio/ap.c
>>
>> diff --git a/default-configs/s390x-softmmu.mak 
>> b/default-configs/s390x-softmmu.mak
>> index 2f4bfe7..0b784b6 100644
>> --- a/default-configs/s390x-softmmu.mak
>> +++ b/default-configs/s390x-softmmu.mak
>> @@ -9,3 +9,4 @@ CONFIG_S390_FLIC=y
>>   CONFIG_S390_FLIC_KVM=$(CONFIG_KVM)
>>   CONFIG_VFIO_CCW=$(CONFIG_LINUX)
>>   CONFIG_WDT_DIAG288=y
>> +CONFIG_VFIO_AP=$(CONFIG_LINUX)
>> diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
>> index c3ab909..7300860 100644
>> --- a/hw/vfio/Makefile.objs
>> +++ b/hw/vfio/Makefile.objs
>> @@ -6,4 +6,5 @@ obj-$(CONFIG_SOFTMMU) += platform.o
>>   obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o
>>   obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o
>>   obj-$(CONFIG_SOFTMMU) += spapr.o
>> +obj-$(CONFIG_VFIO_AP) += ap.o
>>   endif
>> diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
>> new file mode 100644
>> index 0000000..b397bb1
>> --- /dev/null
>> +++ b/hw/vfio/ap.c
>> @@ -0,0 +1,184 @@
>> +/*
>> + * VFIO based AP matrix device assignment
>> + *
>> + * Copyright 2018 IBM Corp.
>> + * Author(s): Tony Krowiak <akrowiak@linux.vnet.ibm.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 
>> or (at
>> + * your option) any later version. See the COPYING file in the 
>> top-level
>> + * directory.
>> + */
>> +
>> +#include <linux/vfio.h>
>> +#include <sys/ioctl.h>
>> +#include "qemu/osdep.h"
>> +#include "qapi/error.h"
>> +#include "hw/sysbus.h"
>> +#include "hw/vfio/vfio.h"
>> +#include "hw/vfio/vfio-common.h"
>> +#include "hw/s390x/ap-device.h"
>> +#include "qemu/error-report.h"
>> +#include "qemu/queue.h"
>> +#include "cpu.h"
>> +#include "kvm_s390x.h"
>> +
>> +#define VFIO_AP_DEVICE_TYPE      "vfio-ap"
>> +
>> +typedef struct VFIOAPDevice {
>> +    APDevice apdev;
>> +    VFIODevice vdev;
>> +    QTAILQ_ENTRY(VFIOAPDevice) sibling;
>> +} VFIOAPDevice;
>> +
>> +static void vfio_ap_compute_needs_reset(VFIODevice *vdev)
>> +{
>> +    vdev->needs_reset = false;
>> +}
>> +
>> +/*
>> + * We don't need vfio_hot_reset_multi and vfio_eoi operations for
>> + * vfio-ap-matrix device now.
>> + */
>> +struct VFIODeviceOps vfio_ap_ops = {
>> +    .vfio_compute_needs_reset = vfio_ap_compute_needs_reset,
>> +};
>> +
>> +static QTAILQ_HEAD(, VFIOAPDevice) vfio_ap_devices =
>> +        QTAILQ_HEAD_INITIALIZER(vfio_ap_devices);
>> +
>> +static void vfio_put_device(VFIOAPDevice *apdev)
>> +{
>> +    g_free(apdev->vdev.name);
>> +    vfio_put_base_device(&apdev->vdev);
>> +}
>> +
>> +static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp)
>> +{
>> +    char *tmp, group_path[PATH_MAX];
>> +    ssize_t len;
>> +    int groupid;
>> +
>> +    tmp = g_strdup_printf("%s/iommu_group", vapdev->vdev.sysfsdev);
>> +    len = readlink(tmp, group_path, sizeof(group_path));
>> +    g_free(tmp);
>> +
>> +    if (len <= 0 || len >= sizeof(group_path)) {
>> +        error_setg(errp, "%s: no iommu_group found for %s",
>> +                   VFIO_AP_DEVICE_TYPE, vapdev->vdev.sysfsdev);
>> +        return NULL;
>> +    }
>> +
>> +    group_path[len] = 0;
>> +
>> +    if (sscanf(basename(group_path), "%d", &groupid) != 1) {
>> +        error_setg(errp, "vfio: failed to read %s", group_path);
>> +        return NULL;
>> +    }
>> +
>> +    return vfio_get_group(groupid, &address_space_memory, errp);
>> +}
>> +
>> +static void vfio_ap_realize(DeviceState *dev, Error **errp)
>> +{
>> +    VFIODevice *vbasedev;
>> +    VFIOGroup *vfio_group;
>> +    APDevice *apdev = DO_UPCAST(APDevice, parent_obj, dev);
>> +    VFIOAPDevice *vapdev = DO_UPCAST(VFIOAPDevice, apdev, apdev);
>> +    char *mdevid;
>> +    Error *local_err = NULL;
>> +    int ret;
>> +
>> +    if (!s390_has_feat(S390_FEAT_AP)) {
>> +        error_setg(&local_err, "AP support not enabled");
>> +        goto out_err;
>> +    }
>> +
>> +    ret = kvm_s390_set_interpret_ap(1);
>
> If we have several devices, this is called once per device.
> However it is a general switch for the VM.
> It looks strange to have this inside the realize callback
I've been so focused on the idea that a guest's AP matrix is configured 
via this
device that I hadn't considered the possibility that there is nothing 
precluding
multiple vfio-ap devices being defined for a guest. I agree, this is a VM
switch, but the question is, where does it belong? Whether AP 
instructions are
interpreted or not is dependent upon what type of VFIO driver is being 
used which
is why I put this here. Would it make sense to model this as a -machine 
property
and treat it like we do with key wrapping support?
>
>
>> +    if (ret) {
>> +        error_setg_errno(&local_err, errno,
>> +                         "error setting interpretive execution of AP 
>> instructions");
>> +        goto out_err;
>> +    }
>> +
>> +    vfio_group = vfio_ap_get_group(vapdev, &local_err);
>> +    if (!vfio_group) {
>> +        goto out_group_err;
>> +    }
>> +
>> +    vapdev->vdev.ops = &vfio_ap_ops;
>> +    vapdev->vdev.type = VFIO_DEVICE_TYPE_AP;
>> +    mdevid = basename(vapdev->vdev.sysfsdev);
>> +    vapdev->vdev.name = g_strdup_printf("%s", mdevid);
>> +    vapdev->vdev.dev = dev;
>> +    QLIST_FOREACH(vbasedev, &vfio_group->device_list, next) {
>> +        if (strcmp(vbasedev->name, vapdev->vdev.name) == 0) {
>> +            error_setg(&local_err,
>> +                       "%s: AP device %s has already been realized",
>> +                       VFIO_AP_DEVICE_TYPE, vapdev->vdev.name);
>> +            goto out_device_err;
>> +        }
>> +    }
>> +
>> +    ret = vfio_get_device(vfio_group, mdevid, &vapdev->vdev, 
>> &local_err);
>> +    if (ret) {
>> +        goto out_device_err;
>> +    }
>> +
>> +    QTAILQ_INSERT_TAIL(&vfio_ap_devices, vapdev, sibling);
>> +
>> +    return;
>> +
>> +
>> +out_device_err:
>> +    vfio_put_group(vfio_group);
>> +out_group_err:
>> +    kvm_s390_set_interpret_ap(0);
>> +out_err:
>> +    error_propagate(errp, local_err);
>> +}
>> +
>> +static void vfio_ap_unrealize(DeviceState *dev, Error **errp)
>> +{
>> +    APDevice *apdev = DO_UPCAST(APDevice, parent_obj, dev);
>> +    VFIOAPDevice *vapdev = DO_UPCAST(VFIOAPDevice, apdev, apdev);
>> +    VFIOGroup *group = vapdev->vdev.group;
>> +
>> +    vfio_put_device(vapdev);
>> +    vfio_put_group(group);
>> +    kvm_s390_set_interpret_ap(0);
>> +}
>> +
>> +static Property vfio_ap_properties[] = {
>> +    DEFINE_PROP_STRING("sysfsdev", VFIOAPDevice, vdev.sysfsdev),
>> +    DEFINE_PROP_END_OF_LIST(),
>> +};
>> +
>> +static const VMStateDescription vfio_ap_vmstate = {
>> +    .name = VFIO_AP_DEVICE_TYPE,
>> +    .unmigratable = 1,
>> +};
>> +
>> +static void vfio_ap_class_init(ObjectClass *klass, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>> +
>> +    dc->props = vfio_ap_properties;
>> +    dc->vmsd = &vfio_ap_vmstate;
>> +    dc->desc = "VFIO-based AP device assignment";
>> +    dc->realize = vfio_ap_realize;
>> +    dc->unrealize = vfio_ap_unrealize;
>
>
> Shouldn't we make the VFIO-AP devices unpluggable attribute
> in the case the AP device are pluggable?
I'm going to make the AP device unpluggable.
>
>
>> +}
>> +
>> +static const TypeInfo vfio_ap_info = {
>> +    .name = VFIO_AP_DEVICE_TYPE,
>> +    .parent = AP_DEVICE_TYPE,
>> +    .instance_size = sizeof(VFIOAPDevice),
>> +    .class_init = vfio_ap_class_init,
>> +};
>> +
>> +static void register_vfio_ap_type(void)
>> +{
>> +    type_register_static(&vfio_ap_info);
>> +}
>> +
>> +type_init(register_vfio_ap_type)
>> diff --git a/include/hw/vfio/vfio-common.h 
>> b/include/hw/vfio/vfio-common.h
>> index f3a2ac9..f1f22d9 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -46,6 +46,7 @@ enum {
>>       VFIO_DEVICE_TYPE_PCI = 0,
>>       VFIO_DEVICE_TYPE_PLATFORM = 1,
>>       VFIO_DEVICE_TYPE_CCW = 2,
>> +    VFIO_DEVICE_TYPE_AP = 3,
>>   };
>>
>>   typedef struct VFIOMmap {
>
>
Tony Krowiak March 16, 2018, 3:29 p.m. UTC | #4
On 03/16/2018 09:22 AM, Halil Pasic wrote:
>
> On 03/16/2018 11:42 AM, Pierre Morel wrote:
>> On 16/03/2018 00:24, Tony Krowiak wrote:
>>> Introduces a VFIO based AP device. The device is defined via
>>> the QEMU command line by specifying:
>>>
>>>       -device vfio-ap,sysfsdev=<path-to-mediated-matrix-device>
>>>
>>> The mediated matrix device is created by the VFIO AP device
>>> driver by writing a UUID to a sysfs attribute file (see
>>> docs/vfio-ap.txt). The mediated matrix device will be named
>>> after the UUID. Symbolic links to the $uuid are created in
>>> many places, so the path to the mediated matrix device $uuid
>>> can be specified in any of the following ways:
>>>
>>> /sys/devices/vfio_ap/matrix/$uuid
>>> /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid
>>> /sys/bus/mdev/devices/$uuid
>>> /sys/bus/mdev/drivers/vfio_mdev/$uuid
>>>
>>> When the vfio-ap device is realized, it acquires and opens the
>>> VFIO iommu group to which the mediated matrix device is
>>> bound. This causes a VFIO group notification event to be
>>> signaled. The vfio_ap device driver's group notification
>>> handler will get called at which time the device driver
>>> will configure the the AP devices to which the guest will
>>> be granted access.
>>>
>>> Signed-off-by: Tony Krowiak <akrowiak@linux.vnet.ibm.com>
>>> ---
> [..]
>>> +static void vfio_ap_realize(DeviceState *dev, Error **errp)
>>> +{
>>> +    VFIODevice *vbasedev;
>>> +    VFIOGroup *vfio_group;
>>> +    APDevice *apdev = DO_UPCAST(APDevice, parent_obj, dev);
>>> +    VFIOAPDevice *vapdev = DO_UPCAST(VFIOAPDevice, apdev, apdev);
>>> +    char *mdevid;
>>> +    Error *local_err = NULL;
>>> +    int ret;
>>> +
>>> +    if (!s390_has_feat(S390_FEAT_AP)) {
>>> +        error_setg(&local_err, "AP support not enabled");
>>> +        goto out_err;
>>> +    }
>>> +
>>> +    ret = kvm_s390_set_interpret_ap(1);
>> If we have several devices, this is called once per device.
> I don't think having several of these in a single vm makes
> any sense. Or does it? IMHO we should make sure there is at
> most one device taking care of the crypto pass-through.
My first thought was that only one vfio-ap device was necessary
to provide the AP matrix to a guest. Allowing additional vfio-ap
devices would definitely be superfluous, so I think it makes sense
to allow at most one vfio-ap device. In that case, setting AP
interpretive execution here makes sense.
>
>> However it is a general switch for the VM.
>> It looks strange to have this inside the realize callback
>>
> I kind of semi agree.
>
> The previous solution (having this transparent for QEMU) had
> benefits. Why did we move away from that again?
This was done to address the multitude of objections and opinions 
related to
how/when/where ECA_APIE is set. Pierre summed it up best with his 
comment "we
need to separate the CPU feature defining 'if AP instructions are 
available'
from the QEMU property defining 'How we provide the instructions'. I 
agree and
this is how I chose to implement that.
>
> Regards,
> Halil
Halil Pasic March 16, 2018, 3:36 p.m. UTC | #5
On 03/16/2018 04:29 PM, Tony Krowiak wrote:
>>> However it is a general switch for the VM.
>>> It looks strange to have this inside the realize callback
>>>
>> I kind of semi agree.
>>
>> The previous solution (having this transparent for QEMU) had
>> benefits. Why did we move away from that again?
> This was done to address the multitude of objections and opinions related to
> how/when/where ECA_APIE is set. Pierre summed it up best with his comment "we
> need to separate the CPU feature defining 'if AP instructions are available'
> from the QEMU property defining 'How we provide the instructions'. I agree and
> this is how I chose to implement that.

It's even more separated if the one is controlled via a
userspace facing interface (cpu models) and the other
is controlled via an in-kernel interface (e.g. like
done previously in vfio open AFAIR).

I can not accept this answer. I don't recall the multitude
and I think I've showed that the 'need to separate' argument
does not work.

Regards,
Halil
Tony Krowiak March 16, 2018, 3:53 p.m. UTC | #6
On 03/16/2018 11:36 AM, Halil Pasic wrote:
>
> On 03/16/2018 04:29 PM, Tony Krowiak wrote:
>>>> However it is a general switch for the VM.
>>>> It looks strange to have this inside the realize callback
>>>>
>>> I kind of semi agree.
>>>
>>> The previous solution (having this transparent for QEMU) had
>>> benefits. Why did we move away from that again?
>> This was done to address the multitude of objections and opinions related to
>> how/when/where ECA_APIE is set. Pierre summed it up best with his comment "we
>> need to separate the CPU feature defining 'if AP instructions are available'
>> from the QEMU property defining 'How we provide the instructions'. I agree and
>> this is how I chose to implement that.
> It's even more separated if the one is controlled via a
> userspace facing interface (cpu models) and the other
> is controlled via an in-kernel interface (e.g. like
> done previously in vfio open AFAIR).
There were objections to that.
>
> I can not accept this answer. I don't recall the multitude
> and I think I've showed that the 'need to separate' argument
> does not work.
I suppose it depends upon how you define multitude. I counted at
at least 11 - actually more - review comments related to the
CPU model feature and setting ECA_APIE. I suggest you go back
and look at the thread.

Where did you show that the 'need to separate' argument does
not work?
>
> Regards,
> Halil
Halil Pasic March 16, 2018, 4:26 p.m. UTC | #7
On 03/16/2018 04:53 PM, Tony Krowiak wrote:
> On 03/16/2018 11:36 AM, Halil Pasic wrote:
>>
>> On 03/16/2018 04:29 PM, Tony Krowiak wrote:
>>>>> However it is a general switch for the VM.
>>>>> It looks strange to have this inside the realize callback
>>>>>
>>>> I kind of semi agree.
>>>>
>>>> The previous solution (having this transparent for QEMU) had
>>>> benefits. Why did we move away from that again?
>>> This was done to address the multitude of objections and opinions related to
>>> how/when/where ECA_APIE is set. Pierre summed it up best with his comment "we
>>> need to separate the CPU feature defining 'if AP instructions are available'
>>> from the QEMU property defining 'How we provide the instructions'. I agree and
>>> this is how I chose to implement that.
>> It's even more separated if the one is controlled via a
>> userspace facing interface (cpu models) and the other
>> is controlled via an in-kernel interface (e.g. like
>> done previously in vfio open AFAIR).
> There were objections to that.

AFAIR we agreed that keeping that interface is OK, but I may be wrong.
You being more specific that 'there were objections'  and 'multitude
of objections and opinions' would have been appreciated. But never
mind.

AFAIU Pierre is nagging you about this in the corresponding kernel
thread. I will let Pierre drive this discussion.

>>
>> I can not accept this answer. I don't recall the multitude
>> and I think I've showed that the 'need to separate' argument
>> does not work.
> I suppose it depends upon how you define multitude. I counted at
> at least 11 - actually more - review comments related to the
> CPU model feature and setting ECA_APIE. I suggest you go back
> and look at the thread.
> 
> Where did you show that the 'need to separate' argument does
> not work?

You answered to that part with 'There were objections to that'.
As I've said before never mind.

Halil
Cornelia Huck March 27, 2018, 12:02 p.m. UTC | #8
On Fri, 16 Mar 2018 14:22:52 +0100
Halil Pasic <pasic@linux.vnet.ibm.com> wrote:

> On 03/16/2018 11:42 AM, Pierre Morel wrote:
> > On 16/03/2018 00:24, Tony Krowiak wrote:  
> >> Introduces a VFIO based AP device. The device is defined via
> >> the QEMU command line by specifying:
> >>
> >>      -device vfio-ap,sysfsdev=<path-to-mediated-matrix-device>
> >>
> >> The mediated matrix device is created by the VFIO AP device
> >> driver by writing a UUID to a sysfs attribute file (see
> >> docs/vfio-ap.txt). The mediated matrix device will be named
> >> after the UUID. Symbolic links to the $uuid are created in
> >> many places, so the path to the mediated matrix device $uuid
> >> can be specified in any of the following ways:
> >>
> >> /sys/devices/vfio_ap/matrix/$uuid
> >> /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid
> >> /sys/bus/mdev/devices/$uuid
> >> /sys/bus/mdev/drivers/vfio_mdev/$uuid
> >>
> >> When the vfio-ap device is realized, it acquires and opens the
> >> VFIO iommu group to which the mediated matrix device is
> >> bound. This causes a VFIO group notification event to be
> >> signaled. The vfio_ap device driver's group notification
> >> handler will get called at which time the device driver
> >> will configure the the AP devices to which the guest will
> >> be granted access.
> >>
> >> Signed-off-by: Tony Krowiak <akrowiak@linux.vnet.ibm.com>
> >> ---  
> [..]
> >> +static void vfio_ap_realize(DeviceState *dev, Error **errp)
> >> +{
> >> +    VFIODevice *vbasedev;
> >> +    VFIOGroup *vfio_group;
> >> +    APDevice *apdev = DO_UPCAST(APDevice, parent_obj, dev);
> >> +    VFIOAPDevice *vapdev = DO_UPCAST(VFIOAPDevice, apdev, apdev);
> >> +    char *mdevid;
> >> +    Error *local_err = NULL;
> >> +    int ret;
> >> +
> >> +    if (!s390_has_feat(S390_FEAT_AP)) {
> >> +        error_setg(&local_err, "AP support not enabled");
> >> +        goto out_err;
> >> +    }
> >> +
> >> +    ret = kvm_s390_set_interpret_ap(1);  
> > 
> > If we have several devices, this is called once per device.  
> 
> I don't think having several of these in a single vm makes
> any sense. Or does it? IMHO we should make sure there is at
> most one device taking care of the crypto pass-through.

Yes, I think we should fence off adding a second device in the realize
function (probably by checking a global variable?)
Tony Krowiak April 2, 2018, 5:05 p.m. UTC | #9
On 03/27/2018 08:02 AM, Cornelia Huck wrote:
> On Fri, 16 Mar 2018 14:22:52 +0100
> Halil Pasic <pasic@linux.vnet.ibm.com> wrote:
>
>> On 03/16/2018 11:42 AM, Pierre Morel wrote:
>>> On 16/03/2018 00:24, Tony Krowiak wrote:
>>>> Introduces a VFIO based AP device. The device is defined via
>>>> the QEMU command line by specifying:
>>>>
>>>>       -device vfio-ap,sysfsdev=<path-to-mediated-matrix-device>
>>>>
>>>> The mediated matrix device is created by the VFIO AP device
>>>> driver by writing a UUID to a sysfs attribute file (see
>>>> docs/vfio-ap.txt). The mediated matrix device will be named
>>>> after the UUID. Symbolic links to the $uuid are created in
>>>> many places, so the path to the mediated matrix device $uuid
>>>> can be specified in any of the following ways:
>>>>
>>>> /sys/devices/vfio_ap/matrix/$uuid
>>>> /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid
>>>> /sys/bus/mdev/devices/$uuid
>>>> /sys/bus/mdev/drivers/vfio_mdev/$uuid
>>>>
>>>> When the vfio-ap device is realized, it acquires and opens the
>>>> VFIO iommu group to which the mediated matrix device is
>>>> bound. This causes a VFIO group notification event to be
>>>> signaled. The vfio_ap device driver's group notification
>>>> handler will get called at which time the device driver
>>>> will configure the the AP devices to which the guest will
>>>> be granted access.
>>>>
>>>> Signed-off-by: Tony Krowiak <akrowiak@linux.vnet.ibm.com>
>>>> ---
>> [..]
>>>> +static void vfio_ap_realize(DeviceState *dev, Error **errp)
>>>> +{
>>>> +    VFIODevice *vbasedev;
>>>> +    VFIOGroup *vfio_group;
>>>> +    APDevice *apdev = DO_UPCAST(APDevice, parent_obj, dev);
>>>> +    VFIOAPDevice *vapdev = DO_UPCAST(VFIOAPDevice, apdev, apdev);
>>>> +    char *mdevid;
>>>> +    Error *local_err = NULL;
>>>> +    int ret;
>>>> +
>>>> +    if (!s390_has_feat(S390_FEAT_AP)) {
>>>> +        error_setg(&local_err, "AP support not enabled");
>>>> +        goto out_err;
>>>> +    }
>>>> +
>>>> +    ret = kvm_s390_set_interpret_ap(1);
>>> If we have several devices, this is called once per device.
>> I don't think having several of these in a single vm makes
>> any sense. Or does it? IMHO we should make sure there is at
>> most one device taking care of the crypto pass-through.
> Yes, I think we should fence off adding a second device in the realize
> function (probably by checking a global variable?)
I agree, will do.
>
Tony Krowiak April 3, 2018, 6:53 p.m. UTC | #10
On 04/02/2018 01:05 PM, Tony Krowiak wrote:
> On 03/27/2018 08:02 AM, Cornelia Huck wrote:
>> On Fri, 16 Mar 2018 14:22:52 +0100
>> Halil Pasic <pasic@linux.vnet.ibm.com> wrote:
>>
>>> On 03/16/2018 11:42 AM, Pierre Morel wrote:
>>>> On 16/03/2018 00:24, Tony Krowiak wrote:
>>>>> Introduces a VFIO based AP device. The device is defined via
>>>>> the QEMU command line by specifying:
>>>>>
>>>>>       -device vfio-ap,sysfsdev=<path-to-mediated-matrix-device>
>>>>>
>>>>> The mediated matrix device is created by the VFIO AP device
>>>>> driver by writing a UUID to a sysfs attribute file (see
>>>>> docs/vfio-ap.txt). The mediated matrix device will be named
>>>>> after the UUID. Symbolic links to the $uuid are created in
>>>>> many places, so the path to the mediated matrix device $uuid
>>>>> can be specified in any of the following ways:
>>>>>
>>>>> /sys/devices/vfio_ap/matrix/$uuid
>>>>> /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid 
>>>>>
>>>>> /sys/bus/mdev/devices/$uuid
>>>>> /sys/bus/mdev/drivers/vfio_mdev/$uuid
>>>>>
>>>>> When the vfio-ap device is realized, it acquires and opens the
>>>>> VFIO iommu group to which the mediated matrix device is
>>>>> bound. This causes a VFIO group notification event to be
>>>>> signaled. The vfio_ap device driver's group notification
>>>>> handler will get called at which time the device driver
>>>>> will configure the the AP devices to which the guest will
>>>>> be granted access.
>>>>>
>>>>> Signed-off-by: Tony Krowiak <akrowiak@linux.vnet.ibm.com>
>>>>> ---
>>> [..]
>>>>> +static void vfio_ap_realize(DeviceState *dev, Error **errp)
>>>>> +{
>>>>> +    VFIODevice *vbasedev;
>>>>> +    VFIOGroup *vfio_group;
>>>>> +    APDevice *apdev = DO_UPCAST(APDevice, parent_obj, dev);
>>>>> +    VFIOAPDevice *vapdev = DO_UPCAST(VFIOAPDevice, apdev, apdev);
>>>>> +    char *mdevid;
>>>>> +    Error *local_err = NULL;
>>>>> +    int ret;
>>>>> +
>>>>> +    if (!s390_has_feat(S390_FEAT_AP)) {
>>>>> +        error_setg(&local_err, "AP support not enabled");
>>>>> +        goto out_err;
>>>>> +    }
>>>>> +
>>>>> +    ret = kvm_s390_set_interpret_ap(1);
>>>> If we have several devices, this is called once per device.
>>> I don't think having several of these in a single vm makes
>>> any sense. Or does it? IMHO we should make sure there is at
>>> most one device taking care of the crypto pass-through.
>> Yes, I think we should fence off adding a second device in the realize
>> function (probably by checking a global variable?)
> I agree, will do.
In light of comments Connie made in Message ID: 
<20180403113619.54ff1e18.cohuck@redhat.com>, this
raises the question: What about hotplug? If we allow only one vfio_ap 
device for a guest, then
there will be no hot plugging of vfio_ap devices except for the case 
where no vfio_ap device is
defined for the guest at startup. The question then becomes how do we 
configure (hot plug?)
additional queues for guest usage? I will have to give this some 
additional thought before
committing to a solution here.
>
>>
>
diff mbox

Patch

diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak
index 2f4bfe7..0b784b6 100644
--- a/default-configs/s390x-softmmu.mak
+++ b/default-configs/s390x-softmmu.mak
@@ -9,3 +9,4 @@  CONFIG_S390_FLIC=y
 CONFIG_S390_FLIC_KVM=$(CONFIG_KVM)
 CONFIG_VFIO_CCW=$(CONFIG_LINUX)
 CONFIG_WDT_DIAG288=y
+CONFIG_VFIO_AP=$(CONFIG_LINUX)
diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
index c3ab909..7300860 100644
--- a/hw/vfio/Makefile.objs
+++ b/hw/vfio/Makefile.objs
@@ -6,4 +6,5 @@  obj-$(CONFIG_SOFTMMU) += platform.o
 obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o
 obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o
 obj-$(CONFIG_SOFTMMU) += spapr.o
+obj-$(CONFIG_VFIO_AP) += ap.o
 endif
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
new file mode 100644
index 0000000..b397bb1
--- /dev/null
+++ b/hw/vfio/ap.c
@@ -0,0 +1,184 @@ 
+/*
+ * VFIO based AP matrix device assignment
+ *
+ * Copyright 2018 IBM Corp.
+ * Author(s): Tony Krowiak <akrowiak@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/sysbus.h"
+#include "hw/vfio/vfio.h"
+#include "hw/vfio/vfio-common.h"
+#include "hw/s390x/ap-device.h"
+#include "qemu/error-report.h"
+#include "qemu/queue.h"
+#include "cpu.h"
+#include "kvm_s390x.h"
+
+#define VFIO_AP_DEVICE_TYPE      "vfio-ap"
+
+typedef struct VFIOAPDevice {
+    APDevice apdev;
+    VFIODevice vdev;
+    QTAILQ_ENTRY(VFIOAPDevice) sibling;
+} VFIOAPDevice;
+
+static void vfio_ap_compute_needs_reset(VFIODevice *vdev)
+{
+    vdev->needs_reset = false;
+}
+
+/*
+ * We don't need vfio_hot_reset_multi and vfio_eoi operations for
+ * vfio-ap-matrix device now.
+ */
+struct VFIODeviceOps vfio_ap_ops = {
+    .vfio_compute_needs_reset = vfio_ap_compute_needs_reset,
+};
+
+static QTAILQ_HEAD(, VFIOAPDevice) vfio_ap_devices =
+        QTAILQ_HEAD_INITIALIZER(vfio_ap_devices);
+
+static void vfio_put_device(VFIOAPDevice *apdev)
+{
+    g_free(apdev->vdev.name);
+    vfio_put_base_device(&apdev->vdev);
+}
+
+static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp)
+{
+    char *tmp, group_path[PATH_MAX];
+    ssize_t len;
+    int groupid;
+
+    tmp = g_strdup_printf("%s/iommu_group", vapdev->vdev.sysfsdev);
+    len = readlink(tmp, group_path, sizeof(group_path));
+    g_free(tmp);
+
+    if (len <= 0 || len >= sizeof(group_path)) {
+        error_setg(errp, "%s: no iommu_group found for %s",
+                   VFIO_AP_DEVICE_TYPE, vapdev->vdev.sysfsdev);
+        return NULL;
+    }
+
+    group_path[len] = 0;
+
+    if (sscanf(basename(group_path), "%d", &groupid) != 1) {
+        error_setg(errp, "vfio: failed to read %s", group_path);
+        return NULL;
+    }
+
+    return vfio_get_group(groupid, &address_space_memory, errp);
+}
+
+static void vfio_ap_realize(DeviceState *dev, Error **errp)
+{
+    VFIODevice *vbasedev;
+    VFIOGroup *vfio_group;
+    APDevice *apdev = DO_UPCAST(APDevice, parent_obj, dev);
+    VFIOAPDevice *vapdev = DO_UPCAST(VFIOAPDevice, apdev, apdev);
+    char *mdevid;
+    Error *local_err = NULL;
+    int ret;
+
+    if (!s390_has_feat(S390_FEAT_AP)) {
+        error_setg(&local_err, "AP support not enabled");
+        goto out_err;
+    }
+
+    ret = kvm_s390_set_interpret_ap(1);
+    if (ret) {
+        error_setg_errno(&local_err, errno,
+                         "error setting interpretive execution of AP instructions");
+        goto out_err;
+    }
+
+    vfio_group = vfio_ap_get_group(vapdev, &local_err);
+    if (!vfio_group) {
+        goto out_group_err;
+    }
+
+    vapdev->vdev.ops = &vfio_ap_ops;
+    vapdev->vdev.type = VFIO_DEVICE_TYPE_AP;
+    mdevid = basename(vapdev->vdev.sysfsdev);
+    vapdev->vdev.name = g_strdup_printf("%s", mdevid);
+    vapdev->vdev.dev = dev;
+    QLIST_FOREACH(vbasedev, &vfio_group->device_list, next) {
+        if (strcmp(vbasedev->name, vapdev->vdev.name) == 0) {
+            error_setg(&local_err,
+                       "%s: AP device %s has already been realized",
+                       VFIO_AP_DEVICE_TYPE, vapdev->vdev.name);
+            goto out_device_err;
+        }
+    }
+
+    ret = vfio_get_device(vfio_group, mdevid, &vapdev->vdev, &local_err);
+    if (ret) {
+        goto out_device_err;
+    }
+
+    QTAILQ_INSERT_TAIL(&vfio_ap_devices, vapdev, sibling);
+
+    return;
+
+
+out_device_err:
+    vfio_put_group(vfio_group);
+out_group_err:
+    kvm_s390_set_interpret_ap(0);
+out_err:
+    error_propagate(errp, local_err);
+}
+
+static void vfio_ap_unrealize(DeviceState *dev, Error **errp)
+{
+    APDevice *apdev = DO_UPCAST(APDevice, parent_obj, dev);
+    VFIOAPDevice *vapdev = DO_UPCAST(VFIOAPDevice, apdev, apdev);
+    VFIOGroup *group = vapdev->vdev.group;
+
+    vfio_put_device(vapdev);
+    vfio_put_group(group);
+    kvm_s390_set_interpret_ap(0);
+}
+
+static Property vfio_ap_properties[] = {
+    DEFINE_PROP_STRING("sysfsdev", VFIOAPDevice, vdev.sysfsdev),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static const VMStateDescription vfio_ap_vmstate = {
+    .name = VFIO_AP_DEVICE_TYPE,
+    .unmigratable = 1,
+};
+
+static void vfio_ap_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->props = vfio_ap_properties;
+    dc->vmsd = &vfio_ap_vmstate;
+    dc->desc = "VFIO-based AP device assignment";
+    dc->realize = vfio_ap_realize;
+    dc->unrealize = vfio_ap_unrealize;
+}
+
+static const TypeInfo vfio_ap_info = {
+    .name = VFIO_AP_DEVICE_TYPE,
+    .parent = AP_DEVICE_TYPE,
+    .instance_size = sizeof(VFIOAPDevice),
+    .class_init = vfio_ap_class_init,
+};
+
+static void register_vfio_ap_type(void)
+{
+    type_register_static(&vfio_ap_info);
+}
+
+type_init(register_vfio_ap_type)
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index f3a2ac9..f1f22d9 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -46,6 +46,7 @@  enum {
     VFIO_DEVICE_TYPE_PCI = 0,
     VFIO_DEVICE_TYPE_PLATFORM = 1,
     VFIO_DEVICE_TYPE_CCW = 2,
+    VFIO_DEVICE_TYPE_AP = 3,
 };
 
 typedef struct VFIOMmap {