Message ID | 20230830103754.36461-17-zhenzhong.duan@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio: Adopt iommufd | expand |
On 8/30/23 12:37, Zhenzhong Duan wrote: > From: Eric Auger <eric.auger@redhat.com> > > Introduce an iommufd object which allows the interaction > with the host /dev/iommu device. > > The /dev/iommu can have been already pre-opened outside of qemu, > in which case the fd can be passed directly along with the > iommufd object: > > This allows the iommufd object to be shared accross several > subsystems (VFIO, VDPA, ...). For example, libvirt would open > the /dev/iommu once. > > If no fd is passed along with the iommufd object, the /dev/iommu > is opened by the qemu code. > > The CONFIG_IOMMUFD option must be set to compile this new object. > > Suggested-by: Alex Williamson <alex.williamson@redhat.com> > Signed-off-by: Eric Auger <eric.auger@redhat.com> > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> > --- > MAINTAINERS | 7 + > backends/Kconfig | 4 + > backends/iommufd.c | 291 +++++++++++++++++++++++++++++++++++++++ > backends/meson.build | 3 + > backends/trace-events | 13 ++ > include/sysemu/iommufd.h | 49 +++++++ > qapi/qom.json | 18 ++- > qemu-options.hx | 13 ++ > 8 files changed, 397 insertions(+), 1 deletion(-) > create mode 100644 backends/iommufd.c > create mode 100644 include/sysemu/iommufd.h > > diff --git a/MAINTAINERS b/MAINTAINERS > index 6111b6b4d9..04663fbb6f 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -2079,6 +2079,13 @@ F: hw/vfio/ap.c > F: docs/system/s390x/vfio-ap.rst > L: qemu-s390x@nongnu.org > > +iommufd > +M: Yi Liu <yi.l.liu@intel.com> > +M: Eric Auger <eric.auger@redhat.com> > +S: Supported > +F: backends/iommufd.c > +F: include/sysemu/iommufd.h > + > vhost > M: Michael S. Tsirkin <mst@redhat.com> > S: Supported > diff --git a/backends/Kconfig b/backends/Kconfig > index f35abc1609..2cb23f62fa 100644 > --- a/backends/Kconfig > +++ b/backends/Kconfig > @@ -1 +1,5 @@ > source tpm/Kconfig > + > +config IOMMUFD > + bool > + depends on VFIO > diff --git a/backends/iommufd.c b/backends/iommufd.c > new file mode 100644 > index 0000000000..07ea434424 > --- /dev/null > +++ b/backends/iommufd.c > @@ -0,0 +1,291 @@ > +/* > + * iommufd container backend > + * > + * Copyright (C) 2023 Intel Corporation. > + * Copyright Red Hat, Inc. 2023 > + * > + * Authors: Yi Liu <yi.l.liu@intel.com> > + * Eric Auger <eric.auger@redhat.com> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + > + * You should have received a copy of the GNU General Public License along > + * with this program; if not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include "qemu/osdep.h" > +#include "sysemu/iommufd.h" > +#include "qapi/error.h" > +#include "qapi/qmp/qerror.h" > +#include "qemu/module.h" > +#include "qom/object_interfaces.h" > +#include "qemu/error-report.h" > +#include "monitor/monitor.h" > +#include "trace.h" > +#include <sys/ioctl.h> > +#include <linux/iommufd.h> > + > +static void iommufd_backend_init(Object *obj) > +{ > + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); > + > + be->fd = -1; > + be->users = 0; > + be->owned = true; > + qemu_mutex_init(&be->lock); > +} > + > +static void iommufd_backend_finalize(Object *obj) > +{ > + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); > + > + if (be->owned) { > + close(be->fd); > + be->fd = -1; > + } > +} > + > +static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) > +{ > + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); > + int fd = -1; > + > + fd = monitor_fd_param(monitor_cur(), str, errp); > + if (fd == -1) { > + error_prepend(errp, "Could not parse remote object fd %s:", str); > + return; > + } > + qemu_mutex_lock(&be->lock); > + be->fd = fd; > + be->owned = false; > + qemu_mutex_unlock(&be->lock); > + trace_iommu_backend_set_fd(be->fd); > +} > + > +static void iommufd_backend_class_init(ObjectClass *oc, void *data) > +{ > + object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); > +} > + > +int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) > +{ > + int fd, ret = 0; > + > + qemu_mutex_lock(&be->lock); > + if (be->users == UINT32_MAX) { > + error_setg(errp, "too many connections"); > + ret = -E2BIG; > + goto out; > + } > + if (be->owned && !be->users) { > + fd = qemu_open_old("/dev/iommu", O_RDWR); > + if (fd < 0) { > + error_setg_errno(errp, errno, "/dev/iommu opening failed"); > + ret = fd; > + goto out; > + } > + be->fd = fd; > + } > + be->users++; > +out: > + trace_iommufd_backend_connect(be->fd, be->owned, > + be->users, ret); > + qemu_mutex_unlock(&be->lock); > + return ret; > +} > + > +void iommufd_backend_disconnect(IOMMUFDBackend *be) > +{ > + qemu_mutex_lock(&be->lock); > + if (!be->users) { > + goto out; > + } > + be->users--; > + if (!be->users && be->owned) { > + close(be->fd); > + be->fd = -1; > + } > +out: > + trace_iommufd_backend_disconnect(be->fd, be->users); > + qemu_mutex_unlock(&be->lock); > +} > + > +static int iommufd_backend_alloc_ioas(int fd, uint32_t *ioas) > +{ > + int ret; > + struct iommu_ioas_alloc alloc_data = { > + .size = sizeof(alloc_data), > + .flags = 0, > + }; > + > + ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data); > + if (ret) { > + error_report("Failed to allocate ioas %m"); > + } > + > + *ioas = alloc_data.out_ioas_id; > + trace_iommufd_backend_alloc_ioas(fd, *ioas, ret); > + > + return ret; > +} > + > +void iommufd_backend_free_id(int fd, uint32_t id) > +{ > + int ret; > + struct iommu_destroy des = { > + .size = sizeof(des), > + .id = id, > + }; > + > + ret = ioctl(fd, IOMMU_DESTROY, &des); > + trace_iommufd_backend_free_id(fd, id, ret); > + if (ret) { > + error_report("Failed to free id: %u %m", id); > + } > +} > + > +int iommufd_backend_get_ioas(IOMMUFDBackend *be, uint32_t *ioas_id) > +{ > + int ret; > + > + ret = iommufd_backend_alloc_ioas(be->fd, ioas_id); > + trace_iommufd_backend_get_ioas(be->fd, *ioas_id, ret); > + return ret; > +} > + > +void iommufd_backend_put_ioas(IOMMUFDBackend *be, uint32_t ioas) > +{ > + trace_iommufd_backend_put_ioas(be->fd, ioas); > + iommufd_backend_free_id(be->fd, ioas); > +} > + > +int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas, > + hwaddr iova, ram_addr_t size) > +{ > + int ret; > + struct iommu_ioas_unmap unmap = { > + .size = sizeof(unmap), > + .ioas_id = ioas, > + .iova = iova, > + .length = size, > + }; > + > + ret = ioctl(be->fd, IOMMU_IOAS_UNMAP, &unmap); > + trace_iommufd_backend_unmap_dma(be->fd, ioas, iova, size, ret); > + if (ret && errno == ENOENT) { > + ret = 0; > + } > + if (ret) { > + error_report("IOMMU_IOAS_UNMAP failed: %s", strerror(errno)); > + } > + return !ret ? 0 : -errno; > +} > + > +int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas, hwaddr iova, > + ram_addr_t size, void *vaddr, bool readonly) > +{ > + int ret; > + struct iommu_ioas_map map = { > + .size = sizeof(map), > + .flags = IOMMU_IOAS_MAP_READABLE | > + IOMMU_IOAS_MAP_FIXED_IOVA, > + .ioas_id = ioas, > + .__reserved = 0, > + .user_va = (int64_t)vaddr, This needs an extra cast (uintptr_t) Thanks, C. > + .iova = iova, > + .length = size, > + }; > + > + if (!readonly) { > + map.flags |= IOMMU_IOAS_MAP_WRITEABLE; > + } > + > + ret = ioctl(be->fd, IOMMU_IOAS_MAP, &map); > + trace_iommufd_backend_map_dma(be->fd, ioas, iova, size, > + vaddr, readonly, ret); > + if (ret) { > + error_report("IOMMU_IOAS_MAP failed: %s", strerror(errno)); > + } > + return !ret ? 0 : -errno; > +} > + > +int iommufd_backend_copy_dma(IOMMUFDBackend *be, uint32_t src_ioas, > + uint32_t dst_ioas, hwaddr iova, > + ram_addr_t size, bool readonly) > +{ > + int ret; > + struct iommu_ioas_copy copy = { > + .size = sizeof(copy), > + .flags = IOMMU_IOAS_MAP_READABLE | > + IOMMU_IOAS_MAP_FIXED_IOVA, > + .dst_ioas_id = dst_ioas, > + .src_ioas_id = src_ioas, > + .length = size, > + .dst_iova = iova, > + .src_iova = iova, > + }; > + > + if (!readonly) { > + copy.flags |= IOMMU_IOAS_MAP_WRITEABLE; > + } > + > + ret = ioctl(be->fd, IOMMU_IOAS_COPY, ©); > + trace_iommufd_backend_copy_dma(be->fd, src_ioas, dst_ioas, > + iova, size, readonly, ret); > + if (ret) { > + error_report("IOMMU_IOAS_COPY failed: %s", strerror(errno)); > + } > + return !ret ? 0 : -errno; > +} > + > +int iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, > + uint32_t pt_id, uint32_t *out_hwpt) > +{ > + int ret; > + struct iommu_hwpt_alloc alloc_hwpt = { > + .size = sizeof(struct iommu_hwpt_alloc), > + .flags = 0, > + .dev_id = dev_id, > + .pt_id = pt_id, > + .__reserved = 0, > + }; > + > + ret = ioctl(iommufd, IOMMU_HWPT_ALLOC, &alloc_hwpt); > + trace_iommufd_backend_alloc_hwpt(iommufd, dev_id, pt_id, ret); > + > + if (ret) { > + error_report("IOMMU_HWPT_ALLOC failed: %s", strerror(errno)); > + } else { > + *out_hwpt = alloc_hwpt.out_hwpt_id; > + } > + return !ret ? 0 : -errno; > +} > + > +static const TypeInfo iommufd_backend_info = { > + .name = TYPE_IOMMUFD_BACKEND, > + .parent = TYPE_OBJECT, > + .instance_size = sizeof(IOMMUFDBackend), > + .instance_init = iommufd_backend_init, > + .instance_finalize = iommufd_backend_finalize, > + .class_size = sizeof(IOMMUFDBackendClass), > + .class_init = iommufd_backend_class_init, > + .interfaces = (InterfaceInfo[]) { > + { TYPE_USER_CREATABLE }, > + { } > + } > +}; > + > +static void register_types(void) > +{ > + type_register_static(&iommufd_backend_info); > +} > + > +type_init(register_types); > diff --git a/backends/meson.build b/backends/meson.build > index 914c7c4afb..29dc147c8e 100644 > --- a/backends/meson.build > +++ b/backends/meson.build > @@ -20,6 +20,9 @@ if have_vhost_user > system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c')) > endif > system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c')) > +if have_iommufd > + system_ss.add(files('iommufd.c')) > +endif > if have_vhost_user_crypto > system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c')) > endif > diff --git a/backends/trace-events b/backends/trace-events > index 652eb76a57..093e3eb1da 100644 > --- a/backends/trace-events > +++ b/backends/trace-events > @@ -5,3 +5,16 @@ dbus_vmstate_pre_save(void) > dbus_vmstate_post_load(int version_id) "version_id: %d" > dbus_vmstate_loading(const char *id) "id: %s" > dbus_vmstate_saving(const char *id) "id: %s" > + > +# iommufd.c > +iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)" > +iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" > +iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" > +iommufd_backend_get_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)" > +iommufd_backend_put_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d" > +iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" > +iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)" > +iommufd_backend_copy_dma(int iommufd, uint32_t src_ioas, uint32_t dst_ioas, uint64_t iova, uint64_t size, bool readonly, int ret) " iommufd=%d src_ioas=%d dst_ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" readonly=%d (%d)" > +iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)" > +iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" > +iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u (%d)" > diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h > new file mode 100644 > index 0000000000..f3bd212170 > --- /dev/null > +++ b/include/sysemu/iommufd.h > @@ -0,0 +1,49 @@ > +#ifndef SYSEMU_IOMMUFD_H > +#define SYSEMU_IOMMUFD_H > + > +#include "qom/object.h" > +#include "qemu/thread.h" > +#include "exec/hwaddr.h" > +#include "exec/cpu-common.h" > + > +#define TYPE_IOMMUFD_BACKEND "iommufd" > +OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, > + IOMMUFD_BACKEND) > +#define IOMMUFD_BACKEND(obj) \ > + OBJECT_CHECK(IOMMUFDBackend, (obj), TYPE_IOMMUFD_BACKEND) > +#define IOMMUFD_BACKEND_GET_CLASS(obj) \ > + OBJECT_GET_CLASS(IOMMUFDBackendClass, (obj), TYPE_IOMMUFD_BACKEND) > +#define IOMMUFD_BACKEND_CLASS(klass) \ > + OBJECT_CLASS_CHECK(IOMMUFDBackendClass, (klass), TYPE_IOMMUFD_BACKEND) > +struct IOMMUFDBackendClass { > + ObjectClass parent_class; > +}; > + > +struct IOMMUFDBackend { > + Object parent; > + > + /*< protected >*/ > + int fd; /* /dev/iommu file descriptor */ > + bool owned; /* is the /dev/iommu opened internally */ > + QemuMutex lock; > + uint32_t users; > + > + /*< public >*/ > +}; > + > +int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp); > +void iommufd_backend_disconnect(IOMMUFDBackend *be); > + > +int iommufd_backend_get_ioas(IOMMUFDBackend *be, uint32_t *ioas_id); > +void iommufd_backend_put_ioas(IOMMUFDBackend *be, uint32_t ioas_id); > +void iommufd_backend_free_id(int fd, uint32_t id); > +int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas, > + hwaddr iova, ram_addr_t size); > +int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas, hwaddr iova, > + ram_addr_t size, void *vaddr, bool readonly); > +int iommufd_backend_copy_dma(IOMMUFDBackend *be, uint32_t src_ioas, > + uint32_t dst_ioas, hwaddr iova, > + ram_addr_t size, bool readonly); > +int iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, > + uint32_t pt_id, uint32_t *out_hwpt); > +#endif > diff --git a/qapi/qom.json b/qapi/qom.json > index fa3e88c8e6..2646ac4cca 100644 > --- a/qapi/qom.json > +++ b/qapi/qom.json > @@ -779,6 +779,18 @@ > { 'struct': 'VfioUserServerProperties', > 'data': { 'socket': 'SocketAddress', 'device': 'str' } } > > +## > +# @IOMMUFDProperties: > +# > +# Properties for IOMMUFDbackend objects. > +# > +# fd: file descriptor name > +# > +# Since: 7.2 > +## > +{ 'struct': 'IOMMUFDProperties', > + 'data': { '*fd': 'str' } } > + > ## > # @RngProperties: > # > @@ -933,6 +945,8 @@ > 'qtest', > 'rng-builtin', > 'rng-egd', > + { 'name': 'iommufd', > + 'if': 'CONFIG_IOMMUFD' }, > { 'name': 'rng-random', > 'if': 'CONFIG_POSIX' }, > 'secret', > @@ -1014,7 +1028,9 @@ > 'tls-creds-x509': 'TlsCredsX509Properties', > 'tls-cipher-suites': 'TlsCredsProperties', > 'x-remote-object': 'RemoteObjectProperties', > - 'x-vfio-user-server': 'VfioUserServerProperties' > + 'x-vfio-user-server': 'VfioUserServerProperties', > + 'iommufd': { 'type': 'IOMMUFDProperties', > + 'if': 'CONFIG_IOMMUFD' } > } } > > ## > diff --git a/qemu-options.hx b/qemu-options.hx > index 29b98c3d4c..827dd085ee 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -5098,6 +5098,19 @@ SRST > > The ``share`` boolean option is on by default with memfd. > > +#ifdef CONFIG_IOMMUFD > + ``-object iommufd,id=id[,fd=fd]`` > + Creates an iommufd backend which allows control of DMA mapping > + through the /dev/iommu device. > + > + The ``id`` parameter is a unique ID which frontends (such as > + vfio-pci of vdpa) will use to connect withe the iommufd backend. > + > + The ``fd`` parameter is an optional pre-opened file descriptor > + resulting from /dev/iommu opening. Usually the iommufd is shared > + accross all subsystems, bringing the benefit of centralized > + reference counting. > +#endif > ``-object rng-builtin,id=id`` > Creates a random number generator backend which obtains entropy > from QEMU builtin functions. The ``id`` parameter is a unique ID
>-----Original Message----- >From: Cédric Le Goater <clg@redhat.com> >Sent: Friday, September 22, 2023 3:16 PM ... >> +int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas, >hwaddr iova, >> + ram_addr_t size, void *vaddr, bool readonly) >> +{ >> + int ret; >> + struct iommu_ioas_map map = { >> + .size = sizeof(map), >> + .flags = IOMMU_IOAS_MAP_READABLE | >> + IOMMU_IOAS_MAP_FIXED_IOVA, >> + .ioas_id = ioas, >> + .__reserved = 0, >> + .user_va = (int64_t)vaddr, > >This needs an extra cast (uintptr_t) Will fix. Thanks Zhenzhong
diff --git a/MAINTAINERS b/MAINTAINERS index 6111b6b4d9..04663fbb6f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2079,6 +2079,13 @@ F: hw/vfio/ap.c F: docs/system/s390x/vfio-ap.rst L: qemu-s390x@nongnu.org +iommufd +M: Yi Liu <yi.l.liu@intel.com> +M: Eric Auger <eric.auger@redhat.com> +S: Supported +F: backends/iommufd.c +F: include/sysemu/iommufd.h + vhost M: Michael S. Tsirkin <mst@redhat.com> S: Supported diff --git a/backends/Kconfig b/backends/Kconfig index f35abc1609..2cb23f62fa 100644 --- a/backends/Kconfig +++ b/backends/Kconfig @@ -1 +1,5 @@ source tpm/Kconfig + +config IOMMUFD + bool + depends on VFIO diff --git a/backends/iommufd.c b/backends/iommufd.c new file mode 100644 index 0000000000..07ea434424 --- /dev/null +++ b/backends/iommufd.c @@ -0,0 +1,291 @@ +/* + * iommufd container backend + * + * Copyright (C) 2023 Intel Corporation. + * Copyright Red Hat, Inc. 2023 + * + * Authors: Yi Liu <yi.l.liu@intel.com> + * Eric Auger <eric.auger@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "sysemu/iommufd.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "qemu/module.h" +#include "qom/object_interfaces.h" +#include "qemu/error-report.h" +#include "monitor/monitor.h" +#include "trace.h" +#include <sys/ioctl.h> +#include <linux/iommufd.h> + +static void iommufd_backend_init(Object *obj) +{ + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); + + be->fd = -1; + be->users = 0; + be->owned = true; + qemu_mutex_init(&be->lock); +} + +static void iommufd_backend_finalize(Object *obj) +{ + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); + + if (be->owned) { + close(be->fd); + be->fd = -1; + } +} + +static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) +{ + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); + int fd = -1; + + fd = monitor_fd_param(monitor_cur(), str, errp); + if (fd == -1) { + error_prepend(errp, "Could not parse remote object fd %s:", str); + return; + } + qemu_mutex_lock(&be->lock); + be->fd = fd; + be->owned = false; + qemu_mutex_unlock(&be->lock); + trace_iommu_backend_set_fd(be->fd); +} + +static void iommufd_backend_class_init(ObjectClass *oc, void *data) +{ + object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); +} + +int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) +{ + int fd, ret = 0; + + qemu_mutex_lock(&be->lock); + if (be->users == UINT32_MAX) { + error_setg(errp, "too many connections"); + ret = -E2BIG; + goto out; + } + if (be->owned && !be->users) { + fd = qemu_open_old("/dev/iommu", O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "/dev/iommu opening failed"); + ret = fd; + goto out; + } + be->fd = fd; + } + be->users++; +out: + trace_iommufd_backend_connect(be->fd, be->owned, + be->users, ret); + qemu_mutex_unlock(&be->lock); + return ret; +} + +void iommufd_backend_disconnect(IOMMUFDBackend *be) +{ + qemu_mutex_lock(&be->lock); + if (!be->users) { + goto out; + } + be->users--; + if (!be->users && be->owned) { + close(be->fd); + be->fd = -1; + } +out: + trace_iommufd_backend_disconnect(be->fd, be->users); + qemu_mutex_unlock(&be->lock); +} + +static int iommufd_backend_alloc_ioas(int fd, uint32_t *ioas) +{ + int ret; + struct iommu_ioas_alloc alloc_data = { + .size = sizeof(alloc_data), + .flags = 0, + }; + + ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data); + if (ret) { + error_report("Failed to allocate ioas %m"); + } + + *ioas = alloc_data.out_ioas_id; + trace_iommufd_backend_alloc_ioas(fd, *ioas, ret); + + return ret; +} + +void iommufd_backend_free_id(int fd, uint32_t id) +{ + int ret; + struct iommu_destroy des = { + .size = sizeof(des), + .id = id, + }; + + ret = ioctl(fd, IOMMU_DESTROY, &des); + trace_iommufd_backend_free_id(fd, id, ret); + if (ret) { + error_report("Failed to free id: %u %m", id); + } +} + +int iommufd_backend_get_ioas(IOMMUFDBackend *be, uint32_t *ioas_id) +{ + int ret; + + ret = iommufd_backend_alloc_ioas(be->fd, ioas_id); + trace_iommufd_backend_get_ioas(be->fd, *ioas_id, ret); + return ret; +} + +void iommufd_backend_put_ioas(IOMMUFDBackend *be, uint32_t ioas) +{ + trace_iommufd_backend_put_ioas(be->fd, ioas); + iommufd_backend_free_id(be->fd, ioas); +} + +int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas, + hwaddr iova, ram_addr_t size) +{ + int ret; + struct iommu_ioas_unmap unmap = { + .size = sizeof(unmap), + .ioas_id = ioas, + .iova = iova, + .length = size, + }; + + ret = ioctl(be->fd, IOMMU_IOAS_UNMAP, &unmap); + trace_iommufd_backend_unmap_dma(be->fd, ioas, iova, size, ret); + if (ret && errno == ENOENT) { + ret = 0; + } + if (ret) { + error_report("IOMMU_IOAS_UNMAP failed: %s", strerror(errno)); + } + return !ret ? 0 : -errno; +} + +int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly) +{ + int ret; + struct iommu_ioas_map map = { + .size = sizeof(map), + .flags = IOMMU_IOAS_MAP_READABLE | + IOMMU_IOAS_MAP_FIXED_IOVA, + .ioas_id = ioas, + .__reserved = 0, + .user_va = (int64_t)vaddr, + .iova = iova, + .length = size, + }; + + if (!readonly) { + map.flags |= IOMMU_IOAS_MAP_WRITEABLE; + } + + ret = ioctl(be->fd, IOMMU_IOAS_MAP, &map); + trace_iommufd_backend_map_dma(be->fd, ioas, iova, size, + vaddr, readonly, ret); + if (ret) { + error_report("IOMMU_IOAS_MAP failed: %s", strerror(errno)); + } + return !ret ? 0 : -errno; +} + +int iommufd_backend_copy_dma(IOMMUFDBackend *be, uint32_t src_ioas, + uint32_t dst_ioas, hwaddr iova, + ram_addr_t size, bool readonly) +{ + int ret; + struct iommu_ioas_copy copy = { + .size = sizeof(copy), + .flags = IOMMU_IOAS_MAP_READABLE | + IOMMU_IOAS_MAP_FIXED_IOVA, + .dst_ioas_id = dst_ioas, + .src_ioas_id = src_ioas, + .length = size, + .dst_iova = iova, + .src_iova = iova, + }; + + if (!readonly) { + copy.flags |= IOMMU_IOAS_MAP_WRITEABLE; + } + + ret = ioctl(be->fd, IOMMU_IOAS_COPY, ©); + trace_iommufd_backend_copy_dma(be->fd, src_ioas, dst_ioas, + iova, size, readonly, ret); + if (ret) { + error_report("IOMMU_IOAS_COPY failed: %s", strerror(errno)); + } + return !ret ? 0 : -errno; +} + +int iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, + uint32_t pt_id, uint32_t *out_hwpt) +{ + int ret; + struct iommu_hwpt_alloc alloc_hwpt = { + .size = sizeof(struct iommu_hwpt_alloc), + .flags = 0, + .dev_id = dev_id, + .pt_id = pt_id, + .__reserved = 0, + }; + + ret = ioctl(iommufd, IOMMU_HWPT_ALLOC, &alloc_hwpt); + trace_iommufd_backend_alloc_hwpt(iommufd, dev_id, pt_id, ret); + + if (ret) { + error_report("IOMMU_HWPT_ALLOC failed: %s", strerror(errno)); + } else { + *out_hwpt = alloc_hwpt.out_hwpt_id; + } + return !ret ? 0 : -errno; +} + +static const TypeInfo iommufd_backend_info = { + .name = TYPE_IOMMUFD_BACKEND, + .parent = TYPE_OBJECT, + .instance_size = sizeof(IOMMUFDBackend), + .instance_init = iommufd_backend_init, + .instance_finalize = iommufd_backend_finalize, + .class_size = sizeof(IOMMUFDBackendClass), + .class_init = iommufd_backend_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static void register_types(void) +{ + type_register_static(&iommufd_backend_info); +} + +type_init(register_types); diff --git a/backends/meson.build b/backends/meson.build index 914c7c4afb..29dc147c8e 100644 --- a/backends/meson.build +++ b/backends/meson.build @@ -20,6 +20,9 @@ if have_vhost_user system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c')) endif system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c')) +if have_iommufd + system_ss.add(files('iommufd.c')) +endif if have_vhost_user_crypto system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c')) endif diff --git a/backends/trace-events b/backends/trace-events index 652eb76a57..093e3eb1da 100644 --- a/backends/trace-events +++ b/backends/trace-events @@ -5,3 +5,16 @@ dbus_vmstate_pre_save(void) dbus_vmstate_post_load(int version_id) "version_id: %d" dbus_vmstate_loading(const char *id) "id: %s" dbus_vmstate_saving(const char *id) "id: %s" + +# iommufd.c +iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)" +iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" +iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" +iommufd_backend_get_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)" +iommufd_backend_put_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d" +iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" +iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)" +iommufd_backend_copy_dma(int iommufd, uint32_t src_ioas, uint32_t dst_ioas, uint64_t iova, uint64_t size, bool readonly, int ret) " iommufd=%d src_ioas=%d dst_ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" readonly=%d (%d)" +iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)" +iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" +iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u (%d)" diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h new file mode 100644 index 0000000000..f3bd212170 --- /dev/null +++ b/include/sysemu/iommufd.h @@ -0,0 +1,49 @@ +#ifndef SYSEMU_IOMMUFD_H +#define SYSEMU_IOMMUFD_H + +#include "qom/object.h" +#include "qemu/thread.h" +#include "exec/hwaddr.h" +#include "exec/cpu-common.h" + +#define TYPE_IOMMUFD_BACKEND "iommufd" +OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, + IOMMUFD_BACKEND) +#define IOMMUFD_BACKEND(obj) \ + OBJECT_CHECK(IOMMUFDBackend, (obj), TYPE_IOMMUFD_BACKEND) +#define IOMMUFD_BACKEND_GET_CLASS(obj) \ + OBJECT_GET_CLASS(IOMMUFDBackendClass, (obj), TYPE_IOMMUFD_BACKEND) +#define IOMMUFD_BACKEND_CLASS(klass) \ + OBJECT_CLASS_CHECK(IOMMUFDBackendClass, (klass), TYPE_IOMMUFD_BACKEND) +struct IOMMUFDBackendClass { + ObjectClass parent_class; +}; + +struct IOMMUFDBackend { + Object parent; + + /*< protected >*/ + int fd; /* /dev/iommu file descriptor */ + bool owned; /* is the /dev/iommu opened internally */ + QemuMutex lock; + uint32_t users; + + /*< public >*/ +}; + +int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp); +void iommufd_backend_disconnect(IOMMUFDBackend *be); + +int iommufd_backend_get_ioas(IOMMUFDBackend *be, uint32_t *ioas_id); +void iommufd_backend_put_ioas(IOMMUFDBackend *be, uint32_t ioas_id); +void iommufd_backend_free_id(int fd, uint32_t id); +int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas, + hwaddr iova, ram_addr_t size); +int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly); +int iommufd_backend_copy_dma(IOMMUFDBackend *be, uint32_t src_ioas, + uint32_t dst_ioas, hwaddr iova, + ram_addr_t size, bool readonly); +int iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, + uint32_t pt_id, uint32_t *out_hwpt); +#endif diff --git a/qapi/qom.json b/qapi/qom.json index fa3e88c8e6..2646ac4cca 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -779,6 +779,18 @@ { 'struct': 'VfioUserServerProperties', 'data': { 'socket': 'SocketAddress', 'device': 'str' } } +## +# @IOMMUFDProperties: +# +# Properties for IOMMUFDbackend objects. +# +# fd: file descriptor name +# +# Since: 7.2 +## +{ 'struct': 'IOMMUFDProperties', + 'data': { '*fd': 'str' } } + ## # @RngProperties: # @@ -933,6 +945,8 @@ 'qtest', 'rng-builtin', 'rng-egd', + { 'name': 'iommufd', + 'if': 'CONFIG_IOMMUFD' }, { 'name': 'rng-random', 'if': 'CONFIG_POSIX' }, 'secret', @@ -1014,7 +1028,9 @@ 'tls-creds-x509': 'TlsCredsX509Properties', 'tls-cipher-suites': 'TlsCredsProperties', 'x-remote-object': 'RemoteObjectProperties', - 'x-vfio-user-server': 'VfioUserServerProperties' + 'x-vfio-user-server': 'VfioUserServerProperties', + 'iommufd': { 'type': 'IOMMUFDProperties', + 'if': 'CONFIG_IOMMUFD' } } } ## diff --git a/qemu-options.hx b/qemu-options.hx index 29b98c3d4c..827dd085ee 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -5098,6 +5098,19 @@ SRST The ``share`` boolean option is on by default with memfd. +#ifdef CONFIG_IOMMUFD + ``-object iommufd,id=id[,fd=fd]`` + Creates an iommufd backend which allows control of DMA mapping + through the /dev/iommu device. + + The ``id`` parameter is a unique ID which frontends (such as + vfio-pci of vdpa) will use to connect withe the iommufd backend. + + The ``fd`` parameter is an optional pre-opened file descriptor + resulting from /dev/iommu opening. Usually the iommufd is shared + accross all subsystems, bringing the benefit of centralized + reference counting. +#endif ``-object rng-builtin,id=id`` Creates a random number generator backend which obtains entropy from QEMU builtin functions. The ``id`` parameter is a unique ID