Message ID | 1457403029-21322-2-git-send-email-david@gibson.dropbear.id.au (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, 8 Mar 2016 13:10:23 +1100 David Gibson <david@gibson.dropbear.id.au> wrote: > At present the code handling IBM's Enhanced Error Handling (EEH) interface > on VFIO devices operates by bypassing the usual VFIO logic with > vfio_container_ioctl(). That's a poorly designed interface with unclear > semantics about exactly what can be operated on. > > In particular it operates on a single vfio container internally (hence the > name), but takes an address space and group id, from which it deduces the > container in a rather roundabout way. groupids are something that code > outside vfio shouldn't even be aware of. > > This patch creates new interfaces for EEH operations. Internally we > have vfio_eeh_container_op() which takes a VFIOContainer object > directly. For external use we have vfio_eeh_as_ok() which determines > if an AddressSpace is usable for EEH (at present this means it has a > single container with exactly one group attached), and vfio_eeh_as_op() > which will perform an operation on an AddressSpace in the unambiguous case, > and otherwise returns an error. > > This interface still isn't great, but it's enough of an improvement to > allow a number of cleanups in other places. > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au> > Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru> > --- I'll let you push this through your tree: Acked-by: Alex Williamson <alex.williamson@redhat.com> > hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++ > include/hw/vfio/vfio.h | 2 ++ > 2 files changed, 97 insertions(+) > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index 96ccb79..0636bb1 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -1137,3 +1137,98 @@ int vfio_container_ioctl(AddressSpace *as, int32_t groupid, > > return vfio_container_do_ioctl(as, groupid, req, param); > } > + > +/* > + * Interfaces for IBM EEH (Enhanced Error Handling) > + */ > +static bool vfio_eeh_container_ok(VFIOContainer *container) > +{ > + /* > + * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO > + * implementation is broken if there are multiple groups in a > + * container. The hardware works in units of Partitionable > + * Endpoints (== IOMMU groups) and the EEH operations naively > + * iterate across all groups in the container, without any logic > + * to make sure the groups have their state synchronized. For > + * certain operations (ENABLE) that might be ok, until an error > + * occurs, but for others (GET_STATE) it's clearly broken. > + */ > + > + /* > + * XXX Once fixed kernels exist, test for them here > + */ > + > + if (QLIST_EMPTY(&container->group_list)) { > + return false; > + } > + > + if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { > + return false; > + } > + > + return true; > +} > + > +static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) > +{ > + struct vfio_eeh_pe_op pe_op = { > + .argsz = sizeof(pe_op), > + .op = op, > + }; > + int ret; > + > + if (!vfio_eeh_container_ok(container)) { > + error_report("vfio/eeh: EEH_PE_OP 0x%x: " > + "kernel requires a container with exactly one group", op); > + return -EPERM; > + } > + > + ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); > + if (ret < 0) { > + error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); > + return -errno; > + } > + > + return 0; > +} > + > +static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) > +{ > + VFIOAddressSpace *space = vfio_get_address_space(as); > + VFIOContainer *container = NULL; > + > + if (QLIST_EMPTY(&space->containers)) { > + /* No containers to act on */ > + goto out; > + } > + > + container = QLIST_FIRST(&space->containers); > + > + if (QLIST_NEXT(container, next)) { > + /* We don't yet have logic to synchronize EEH state across > + * multiple containers */ > + container = NULL; > + goto out; > + } > + > +out: > + vfio_put_address_space(space); > + return container; > +} > + > +bool vfio_eeh_as_ok(AddressSpace *as) > +{ > + VFIOContainer *container = vfio_eeh_as_container(as); > + > + return (container != NULL) && vfio_eeh_container_ok(container); > +} > + > +int vfio_eeh_as_op(AddressSpace *as, uint32_t op) > +{ > + VFIOContainer *container = vfio_eeh_as_container(as); > + > + if (!container) { > + return -ENODEV; > + } > + return vfio_eeh_container_op(container, op); > +} > diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h > index 0b26cd8..fd3933b 100644 > --- a/include/hw/vfio/vfio.h > +++ b/include/hw/vfio/vfio.h > @@ -5,5 +5,7 @@ > > extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid, > int req, void *param); > +bool vfio_eeh_as_ok(AddressSpace *as); > +int vfio_eeh_as_op(AddressSpace *as, uint32_t op); > > #endif
On Tue, Mar 08, 2016 at 11:33:45AM -0700, Alex Williamson wrote: > On Tue, 8 Mar 2016 13:10:23 +1100 > David Gibson <david@gibson.dropbear.id.au> wrote: > > > At present the code handling IBM's Enhanced Error Handling (EEH) interface > > on VFIO devices operates by bypassing the usual VFIO logic with > > vfio_container_ioctl(). That's a poorly designed interface with unclear > > semantics about exactly what can be operated on. > > > > In particular it operates on a single vfio container internally (hence the > > name), but takes an address space and group id, from which it deduces the > > container in a rather roundabout way. groupids are something that code > > outside vfio shouldn't even be aware of. > > > > This patch creates new interfaces for EEH operations. Internally we > > have vfio_eeh_container_op() which takes a VFIOContainer object > > directly. For external use we have vfio_eeh_as_ok() which determines > > if an AddressSpace is usable for EEH (at present this means it has a > > single container with exactly one group attached), and vfio_eeh_as_op() > > which will perform an operation on an AddressSpace in the unambiguous case, > > and otherwise returns an error. > > > > This interface still isn't great, but it's enough of an improvement to > > allow a number of cleanups in other places. > > > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au> > > Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru> > > --- > > I'll let you push this through your tree: > > Acked-by: Alex Williamson <alex.williamson@redhat.com> Thanks. Any guess at when your vGPU series will be pushed? Mine will conflict until that is merged upstream. > > > hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++ > > include/hw/vfio/vfio.h | 2 ++ > > 2 files changed, 97 insertions(+) > > > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > > index 96ccb79..0636bb1 100644 > > --- a/hw/vfio/common.c > > +++ b/hw/vfio/common.c > > @@ -1137,3 +1137,98 @@ int vfio_container_ioctl(AddressSpace *as, int32_t groupid, > > > > return vfio_container_do_ioctl(as, groupid, req, param); > > } > > + > > +/* > > + * Interfaces for IBM EEH (Enhanced Error Handling) > > + */ > > +static bool vfio_eeh_container_ok(VFIOContainer *container) > > +{ > > + /* > > + * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO > > + * implementation is broken if there are multiple groups in a > > + * container. The hardware works in units of Partitionable > > + * Endpoints (== IOMMU groups) and the EEH operations naively > > + * iterate across all groups in the container, without any logic > > + * to make sure the groups have their state synchronized. For > > + * certain operations (ENABLE) that might be ok, until an error > > + * occurs, but for others (GET_STATE) it's clearly broken. > > + */ > > + > > + /* > > + * XXX Once fixed kernels exist, test for them here > > + */ > > + > > + if (QLIST_EMPTY(&container->group_list)) { > > + return false; > > + } > > + > > + if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { > > + return false; > > + } > > + > > + return true; > > +} > > + > > +static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) > > +{ > > + struct vfio_eeh_pe_op pe_op = { > > + .argsz = sizeof(pe_op), > > + .op = op, > > + }; > > + int ret; > > + > > + if (!vfio_eeh_container_ok(container)) { > > + error_report("vfio/eeh: EEH_PE_OP 0x%x: " > > + "kernel requires a container with exactly one group", op); > > + return -EPERM; > > + } > > + > > + ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); > > + if (ret < 0) { > > + error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); > > + return -errno; > > + } > > + > > + return 0; > > +} > > + > > +static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) > > +{ > > + VFIOAddressSpace *space = vfio_get_address_space(as); > > + VFIOContainer *container = NULL; > > + > > + if (QLIST_EMPTY(&space->containers)) { > > + /* No containers to act on */ > > + goto out; > > + } > > + > > + container = QLIST_FIRST(&space->containers); > > + > > + if (QLIST_NEXT(container, next)) { > > + /* We don't yet have logic to synchronize EEH state across > > + * multiple containers */ > > + container = NULL; > > + goto out; > > + } > > + > > +out: > > + vfio_put_address_space(space); > > + return container; > > +} > > + > > +bool vfio_eeh_as_ok(AddressSpace *as) > > +{ > > + VFIOContainer *container = vfio_eeh_as_container(as); > > + > > + return (container != NULL) && vfio_eeh_container_ok(container); > > +} > > + > > +int vfio_eeh_as_op(AddressSpace *as, uint32_t op) > > +{ > > + VFIOContainer *container = vfio_eeh_as_container(as); > > + > > + if (!container) { > > + return -ENODEV; > > + } > > + return vfio_eeh_container_op(container, op); > > +} > > diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h > > index 0b26cd8..fd3933b 100644 > > --- a/include/hw/vfio/vfio.h > > +++ b/include/hw/vfio/vfio.h > > @@ -5,5 +5,7 @@ > > > > extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid, > > int req, void *param); > > +bool vfio_eeh_as_ok(AddressSpace *as); > > +int vfio_eeh_as_op(AddressSpace *as, uint32_t op); > > > > #endif >
On Wed, 9 Mar 2016 11:56:57 +1100 David Gibson <david@gibson.dropbear.id.au> wrote: > On Tue, Mar 08, 2016 at 11:33:45AM -0700, Alex Williamson wrote: > > On Tue, 8 Mar 2016 13:10:23 +1100 > > David Gibson <david@gibson.dropbear.id.au> wrote: > > > > > At present the code handling IBM's Enhanced Error Handling (EEH) interface > > > on VFIO devices operates by bypassing the usual VFIO logic with > > > vfio_container_ioctl(). That's a poorly designed interface with unclear > > > semantics about exactly what can be operated on. > > > > > > In particular it operates on a single vfio container internally (hence the > > > name), but takes an address space and group id, from which it deduces the > > > container in a rather roundabout way. groupids are something that code > > > outside vfio shouldn't even be aware of. > > > > > > This patch creates new interfaces for EEH operations. Internally we > > > have vfio_eeh_container_op() which takes a VFIOContainer object > > > directly. For external use we have vfio_eeh_as_ok() which determines > > > if an AddressSpace is usable for EEH (at present this means it has a > > > single container with exactly one group attached), and vfio_eeh_as_op() > > > which will perform an operation on an AddressSpace in the unambiguous case, > > > and otherwise returns an error. > > > > > > This interface still isn't great, but it's enough of an improvement to > > > allow a number of cleanups in other places. > > > > > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au> > > > Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru> > > > --- > > > > I'll let you push this through your tree: > > > > Acked-by: Alex Williamson <alex.williamson@redhat.com> > > Thanks. Any guess at when your vGPU series will be pushed? Mine will > conflict until that is merged upstream. It's been out long enough, I'll send a pull request tomorrow. Thanks, Alex
diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 96ccb79..0636bb1 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1137,3 +1137,98 @@ int vfio_container_ioctl(AddressSpace *as, int32_t groupid, return vfio_container_do_ioctl(as, groupid, req, param); } + +/* + * Interfaces for IBM EEH (Enhanced Error Handling) + */ +static bool vfio_eeh_container_ok(VFIOContainer *container) +{ + /* + * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO + * implementation is broken if there are multiple groups in a + * container. The hardware works in units of Partitionable + * Endpoints (== IOMMU groups) and the EEH operations naively + * iterate across all groups in the container, without any logic + * to make sure the groups have their state synchronized. For + * certain operations (ENABLE) that might be ok, until an error + * occurs, but for others (GET_STATE) it's clearly broken. + */ + + /* + * XXX Once fixed kernels exist, test for them here + */ + + if (QLIST_EMPTY(&container->group_list)) { + return false; + } + + if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { + return false; + } + + return true; +} + +static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) +{ + struct vfio_eeh_pe_op pe_op = { + .argsz = sizeof(pe_op), + .op = op, + }; + int ret; + + if (!vfio_eeh_container_ok(container)) { + error_report("vfio/eeh: EEH_PE_OP 0x%x: " + "kernel requires a container with exactly one group", op); + return -EPERM; + } + + ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); + if (ret < 0) { + error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); + return -errno; + } + + return 0; +} + +static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) +{ + VFIOAddressSpace *space = vfio_get_address_space(as); + VFIOContainer *container = NULL; + + if (QLIST_EMPTY(&space->containers)) { + /* No containers to act on */ + goto out; + } + + container = QLIST_FIRST(&space->containers); + + if (QLIST_NEXT(container, next)) { + /* We don't yet have logic to synchronize EEH state across + * multiple containers */ + container = NULL; + goto out; + } + +out: + vfio_put_address_space(space); + return container; +} + +bool vfio_eeh_as_ok(AddressSpace *as) +{ + VFIOContainer *container = vfio_eeh_as_container(as); + + return (container != NULL) && vfio_eeh_container_ok(container); +} + +int vfio_eeh_as_op(AddressSpace *as, uint32_t op) +{ + VFIOContainer *container = vfio_eeh_as_container(as); + + if (!container) { + return -ENODEV; + } + return vfio_eeh_container_op(container, op); +} diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h index 0b26cd8..fd3933b 100644 --- a/include/hw/vfio/vfio.h +++ b/include/hw/vfio/vfio.h @@ -5,5 +5,7 @@ extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid, int req, void *param); +bool vfio_eeh_as_ok(AddressSpace *as); +int vfio_eeh_as_op(AddressSpace *as, uint32_t op); #endif