Message ID | 1650646263-22047-4-git-send-email-olekstysh@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | virtio: Solution to restrict memory access under Xen using xen-grant DMA-mapping layer | expand |
On Fri, 22 Apr 2022, Oleksandr Tyshchenko wrote: > From: Juergen Gross <jgross@suse.com> > > Introduce Xen grant DMA-mapping layer which contains special DMA-mapping > routines for providing grant references as DMA addresses to be used by > frontends (e.g. virtio) in Xen guests. > > In order to support virtio in Xen guests add a config option XEN_VIRTIO > enabling the user to specify whether in all Xen guests virtio should > be able to access memory via Xen grant mappings only on the host side. > > As this also requires providing arch_has_restricted_virtio_memory_access > implementation, switch from a pure stub to a real function on Arm > and combine with existing implementation for the SEV guests on x86. > > Add the needed functionality by providing a special set of DMA ops > handling the needed grant operations for the I/O pages. > > Signed-off-by: Juergen Gross <jgross@suse.com> > Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> There are a couple of minor things that checkpatch.pl reports, but aside from those the patch looks fine to me. > --- > Changes RFC -> V1: > - squash with almost all changes from commit (except handling "xen,dev-domid" > property): > "[PATCH 4/6] virtio: Various updates to xen-virtio DMA ops layer" > - update commit subject/description and comments in code > - leave only single Kconfig option XEN_VIRTIO and remove architectural > dependencies > - introduce common xen_has_restricted_virtio_memory_access() in xen.h > and update arch_has_restricted_virtio_memory_access() for both > Arm and x86 to call new helper > - use (1ULL << 63) instead of 0x8000000000000000ULL for XEN_GRANT_ADDR_OFF > - implement xen_virtio_dma_map(unmap)_sg() using example in swiotlb-xen.c > - optimize padding by moving "broken" field in struct xen_virtio_data > - remove unneeded per-device spinlock > - remove the inclusion of virtio_config.h > - remane everything according to the new naming scheme: > s/virtio/grant_dma > - add new hidden config option XEN_GRANT_DMA_OPS > --- > arch/arm/xen/enlighten.c | 8 ++ > arch/x86/mm/init.c | 11 ++ > arch/x86/mm/mem_encrypt.c | 5 - > drivers/xen/Kconfig | 15 +++ > drivers/xen/Makefile | 1 + > drivers/xen/grant-dma-ops.c | 317 ++++++++++++++++++++++++++++++++++++++++++++ > include/xen/xen-ops.h | 8 ++ > include/xen/xen.h | 5 + > 8 files changed, 365 insertions(+), 5 deletions(-) > create mode 100644 drivers/xen/grant-dma-ops.c > > diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c > index ec5b082..49af493 100644 > --- a/arch/arm/xen/enlighten.c > +++ b/arch/arm/xen/enlighten.c > @@ -409,6 +409,14 @@ int __init arch_xen_unpopulated_init(struct resource **res) > } > #endif > > +#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS > +int arch_has_restricted_virtio_memory_access(void) > +{ > + return xen_has_restricted_virtio_memory_access(); > +} > +EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access); > +#endif > + > static void __init xen_dt_guest_init(void) > { > struct device_node *xen_node; > diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c > index d8cfce2..fe84a3e 100644 > --- a/arch/x86/mm/init.c > +++ b/arch/x86/mm/init.c > @@ -8,6 +8,8 @@ > #include <linux/kmemleak.h> > #include <linux/sched/task.h> > > +#include <xen/xen.h> > + > #include <asm/set_memory.h> > #include <asm/e820/api.h> > #include <asm/init.h> > @@ -1065,3 +1067,12 @@ unsigned long max_swapfile_size(void) > return pages; > } > #endif > + > +#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS > +int arch_has_restricted_virtio_memory_access(void) > +{ > + return (xen_has_restricted_virtio_memory_access() || > + cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)); > +} > +EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access); > +#endif > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c > index 50d2099..dda020f 100644 > --- a/arch/x86/mm/mem_encrypt.c > +++ b/arch/x86/mm/mem_encrypt.c > @@ -77,8 +77,3 @@ void __init mem_encrypt_init(void) > print_mem_encrypt_feature_info(); > } > > -int arch_has_restricted_virtio_memory_access(void) > -{ > - return cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT); > -} > -EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access); > diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig > index 120d32f..b95581f 100644 > --- a/drivers/xen/Kconfig > +++ b/drivers/xen/Kconfig > @@ -335,4 +335,19 @@ config XEN_UNPOPULATED_ALLOC > having to balloon out RAM regions in order to obtain physical memory > space to create such mappings. > > +config XEN_GRANT_DMA_OPS > + bool > + select DMA_OPS > + > +config XEN_VIRTIO > + bool "Xen virtio support" > + default n > + depends on VIRTIO > + select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS > + select XEN_GRANT_DMA_OPS > + help > + Enable virtio support for running as Xen guest. Depending on the > + guest type this will require special support on the backend side > + (qemu or kernel, depending on the virtio device types used). > + > endmenu > diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile > index 5aae66e..1a23cb0 100644 > --- a/drivers/xen/Makefile > +++ b/drivers/xen/Makefile > @@ -39,3 +39,4 @@ xen-gntalloc-y := gntalloc.o > xen-privcmd-y := privcmd.o privcmd-buf.o > obj-$(CONFIG_XEN_FRONT_PGDIR_SHBUF) += xen-front-pgdir-shbuf.o > obj-$(CONFIG_XEN_UNPOPULATED_ALLOC) += unpopulated-alloc.o > +obj-$(CONFIG_XEN_GRANT_DMA_OPS) += grant-dma-ops.o > diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c > new file mode 100644 > index 00000000..0e69aa8 > --- /dev/null > +++ b/drivers/xen/grant-dma-ops.c > @@ -0,0 +1,317 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/****************************************************************************** > + * Xen grant DMA-mapping layer - contains special DMA-mapping routines > + * for providing grant references as DMA addresses to be used by frontends > + * (e.g. virtio) in Xen guests > + * > + * Copyright (c) 2021, Juergen Gross <jgross@suse.com> > + */ > + > +#include <linux/module.h> > +#include <linux/dma-map-ops.h> > +#include <linux/of.h> > +#include <linux/pci.h> > +#include <linux/pfn.h> > +#include <xen/xen.h> > +#include <xen/grant_table.h> > + > +struct xen_grant_dma_data { > + /* The ID of backend domain */ > + domid_t dev_domid; > + /* Is device behaving sane? */ > + bool broken; > + struct device *dev; > + struct list_head list; > +}; > + > +static LIST_HEAD(xen_grant_dma_devices); > +static DEFINE_SPINLOCK(xen_grant_dma_lock); > + > +#define XEN_GRANT_DMA_ADDR_OFF (1ULL << 63) > + > +static inline dma_addr_t grant_to_dma(grant_ref_t grant) > +{ > + return XEN_GRANT_DMA_ADDR_OFF | ((dma_addr_t)grant << PAGE_SHIFT); > +} > + > +static inline grant_ref_t dma_to_grant(dma_addr_t dma) > +{ > + return (grant_ref_t)((dma & ~XEN_GRANT_DMA_ADDR_OFF) >> PAGE_SHIFT); > +} > + > +static struct xen_grant_dma_data *find_xen_grant_dma_data(struct device *dev) > +{ > + struct xen_grant_dma_data *data = NULL; > + bool found = false; > + > + spin_lock(&xen_grant_dma_lock); > + > + list_for_each_entry(data, &xen_grant_dma_devices, list) { > + if (data->dev == dev) { > + found = true; > + break; > + } > + } > + > + spin_unlock(&xen_grant_dma_lock); > + > + return found ? data : NULL; > +} > + > +/* > + * DMA ops for Xen frontends (e.g. virtio). > + * > + * Used to act as a kind of software IOMMU for Xen guests by using grants as > + * DMA addresses. > + * Such a DMA address is formed by using the grant reference as a frame > + * number and setting the highest address bit (this bit is for the backend > + * to be able to distinguish it from e.g. a mmio address). > + * > + * Note that for now we hard wire dom0 to be the backend domain. In order > + * to support any domain as backend we'd need to add a way to communicate > + * the domid of this backend, e.g. via Xenstore, via the PCI-device's > + * config space or DT/ACPI. > + */ > +static void *xen_grant_dma_alloc(struct device *dev, size_t size, > + dma_addr_t *dma_handle, gfp_t gfp, > + unsigned long attrs) > +{ > + struct xen_grant_dma_data *data; > + unsigned int i, n_pages = PFN_UP(size); > + unsigned long pfn; > + grant_ref_t grant; > + void *ret; > + > + data = find_xen_grant_dma_data(dev); > + if (!data) > + return NULL; > + > + if (unlikely(data->broken)) > + return NULL; > + > + ret = alloc_pages_exact(n_pages * PAGE_SIZE, gfp); > + if (!ret) > + return NULL; > + > + pfn = virt_to_pfn(ret); > + > + if (gnttab_alloc_grant_reference_seq(n_pages, &grant)) { > + free_pages_exact(ret, n_pages * PAGE_SIZE); > + return NULL; > + } > + > + for (i = 0; i < n_pages; i++) { > + gnttab_grant_foreign_access_ref(grant + i, data->dev_domid, > + pfn_to_gfn(pfn + i), 0); > + } > + > + *dma_handle = grant_to_dma(grant); > + > + return ret; > +} > + > +static void xen_grant_dma_free(struct device *dev, size_t size, void *vaddr, > + dma_addr_t dma_handle, unsigned long attrs) > +{ > + struct xen_grant_dma_data *data; > + unsigned int i, n_pages = PFN_UP(size); > + grant_ref_t grant; > + > + data = find_xen_grant_dma_data(dev); > + if (!data) > + return; > + > + if (unlikely(data->broken)) > + return; > + > + grant = dma_to_grant(dma_handle); > + > + for (i = 0; i < n_pages; i++) { > + if (unlikely(!gnttab_end_foreign_access_ref(grant + i))) { > + dev_alert(dev, "Grant still in use by backend domain, disabled for further use\n"); > + data->broken = true; > + return; > + } > + } > + > + gnttab_free_grant_reference_seq(grant, n_pages); > + > + free_pages_exact(vaddr, n_pages * PAGE_SIZE); > +} > + > +static struct page *xen_grant_dma_alloc_pages(struct device *dev, size_t size, > + dma_addr_t *dma_handle, > + enum dma_data_direction dir, > + gfp_t gfp) > +{ > + WARN_ONCE(1, "xen_grant_dma_alloc_pages size %zu\n", size); > + return NULL; > +} > + > +static void xen_grant_dma_free_pages(struct device *dev, size_t size, > + struct page *vaddr, dma_addr_t dma_handle, > + enum dma_data_direction dir) > +{ > + WARN_ONCE(1, "xen_grant_dma_free_pages size %zu\n", size); > +} > + > +static dma_addr_t xen_grant_dma_map_page(struct device *dev, struct page *page, > + unsigned long offset, size_t size, > + enum dma_data_direction dir, > + unsigned long attrs) > +{ > + struct xen_grant_dma_data *data; > + unsigned int i, n_pages = PFN_UP(size); > + grant_ref_t grant; > + dma_addr_t dma_handle; > + > + BUG_ON(dir == DMA_NONE); > + > + data = find_xen_grant_dma_data(dev); > + if (!data) > + return DMA_MAPPING_ERROR; > + > + if (unlikely(data->broken)) > + return DMA_MAPPING_ERROR; > + > + if (gnttab_alloc_grant_reference_seq(n_pages, &grant)) > + return DMA_MAPPING_ERROR; > + > + for (i = 0; i < n_pages; i++) { > + gnttab_grant_foreign_access_ref(grant + i, data->dev_domid, > + xen_page_to_gfn(page) + i, dir == DMA_TO_DEVICE); > + } > + > + dma_handle = grant_to_dma(grant) + offset; > + > + return dma_handle; > +} > + > +static void xen_grant_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, > + size_t size, enum dma_data_direction dir, > + unsigned long attrs) > +{ > + struct xen_grant_dma_data *data; > + unsigned int i, n_pages = PFN_UP(size); > + grant_ref_t grant; > + > + BUG_ON(dir == DMA_NONE); > + > + data = find_xen_grant_dma_data(dev); > + if (!data) > + return; > + > + if (unlikely(data->broken)) > + return; > + > + grant = dma_to_grant(dma_handle); > + > + for (i = 0; i < n_pages; i++) { > + if (unlikely(!gnttab_end_foreign_access_ref(grant + i))) { > + dev_alert(dev, "Grant still in use by backend domain, disabled for further use\n"); > + data->broken = true; > + return; > + } > + } > + > + gnttab_free_grant_reference_seq(grant, n_pages); > +} > + > +static void xen_grant_dma_unmap_sg(struct device *dev, struct scatterlist *sg, > + int nents, enum dma_data_direction dir, > + unsigned long attrs) > +{ > + struct scatterlist *s; > + unsigned int i; > + > + BUG_ON(dir == DMA_NONE); > + > + for_each_sg(sg, s, nents, i) > + xen_grant_dma_unmap_page(dev, s->dma_address, sg_dma_len(s), dir, > + attrs); > +} > + > +static int xen_grant_dma_map_sg(struct device *dev, struct scatterlist *sg, > + int nents, enum dma_data_direction dir, > + unsigned long attrs) > +{ > + struct scatterlist *s; > + unsigned int i; > + > + BUG_ON(dir == DMA_NONE); > + > + for_each_sg(sg, s, nents, i) { > + s->dma_address = xen_grant_dma_map_page(dev, sg_page(s), s->offset, > + s->length, dir, attrs); > + if (s->dma_address == DMA_MAPPING_ERROR) > + goto out; > + > + sg_dma_len(s) = s->length; > + } > + > + return nents; > + > +out: > + xen_grant_dma_unmap_sg(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); > + sg_dma_len(sg) = 0; > + > + return -EIO; > +} > + > +static int xen_grant_dma_supported(struct device *dev, u64 mask) > +{ > + return mask == DMA_BIT_MASK(64); > +} > + > +static const struct dma_map_ops xen_grant_dma_ops = { > + .alloc = xen_grant_dma_alloc, > + .free = xen_grant_dma_free, > + .alloc_pages = xen_grant_dma_alloc_pages, > + .free_pages = xen_grant_dma_free_pages, > + .mmap = dma_common_mmap, > + .get_sgtable = dma_common_get_sgtable, > + .map_page = xen_grant_dma_map_page, > + .unmap_page = xen_grant_dma_unmap_page, > + .map_sg = xen_grant_dma_map_sg, > + .unmap_sg = xen_grant_dma_unmap_sg, > + .dma_supported = xen_grant_dma_supported, > +}; > + > +void xen_grant_setup_dma_ops(struct device *dev) > +{ > + struct xen_grant_dma_data *data; > + uint32_t dev_domid; > + > + data = find_xen_grant_dma_data(dev); > + if (data) { > + dev_err(dev, "Xen grant DMA data is already created\n"); > + return; > + } > + > + /* XXX The dom0 is hardcoded as the backend domain for now */ > + dev_domid = 0; > + > + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); > + if (!data) { > + dev_err(dev, "Сannot allocate Xen grant DMA data\n"); > + goto err; > + } > + data->dev_domid = dev_domid; > + data->dev = dev; > + > + spin_lock(&xen_grant_dma_lock); > + list_add(&data->list, &xen_grant_dma_devices); > + spin_unlock(&xen_grant_dma_lock); > + > + dev->dma_ops = &xen_grant_dma_ops; > + > + return; > + > +err: > + dev_err(dev, "Сannot set up Xen grant DMA ops, retain platform DMA ops\n"); > +} > +EXPORT_SYMBOL_GPL(xen_grant_setup_dma_ops); > + > +MODULE_DESCRIPTION("Xen grant DMA-mapping layer"); > +MODULE_AUTHOR("Juergen Gross <jgross@suse.com>"); > +MODULE_LICENSE("GPL"); > diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h > index a3584a3..4f9fad5 100644 > --- a/include/xen/xen-ops.h > +++ b/include/xen/xen-ops.h > @@ -221,4 +221,12 @@ static inline void xen_preemptible_hcall_end(void) { } > > #endif /* CONFIG_XEN_PV && !CONFIG_PREEMPTION */ > > +#ifdef CONFIG_XEN_GRANT_DMA_OPS > +void xen_grant_setup_dma_ops(struct device *dev); > +#else > +static inline void xen_grant_setup_dma_ops(struct device *dev) > +{ > +} > +#endif /* CONFIG_XEN_GRANT_DMA_OPS */ > + > #endif /* INCLUDE_XEN_OPS_H */ > diff --git a/include/xen/xen.h b/include/xen/xen.h > index a99bab8..fe6e6bb 100644 > --- a/include/xen/xen.h > +++ b/include/xen/xen.h > @@ -52,6 +52,11 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, > extern u64 xen_saved_max_mem_size; > #endif > > +static inline int xen_has_restricted_virtio_memory_access(void) > +{ > + return IS_ENABLED(CONFIG_XEN_VIRTIO) && xen_domain(); > +} > + > #ifdef CONFIG_XEN_UNPOPULATED_ALLOC > int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages); > void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages); > -- > 2.7.4 > > > _______________________________________________ > linux-arm-kernel mailing list > linux-arm-kernel@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel >
On 23.04.22 02:00, Stefano Stabellini wrote: Hello Stefano > On Fri, 22 Apr 2022, Oleksandr Tyshchenko wrote: >> From: Juergen Gross <jgross@suse.com> >> >> Introduce Xen grant DMA-mapping layer which contains special DMA-mapping >> routines for providing grant references as DMA addresses to be used by >> frontends (e.g. virtio) in Xen guests. >> >> In order to support virtio in Xen guests add a config option XEN_VIRTIO >> enabling the user to specify whether in all Xen guests virtio should >> be able to access memory via Xen grant mappings only on the host side. >> >> As this also requires providing arch_has_restricted_virtio_memory_access >> implementation, switch from a pure stub to a real function on Arm >> and combine with existing implementation for the SEV guests on x86. >> >> Add the needed functionality by providing a special set of DMA ops >> handling the needed grant operations for the I/O pages. >> >> Signed-off-by: Juergen Gross <jgross@suse.com> >> Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> > There are a couple of minor things that checkpatch.pl reports, Thank you for pointing this out, my fault. > but aside > from those the patch looks fine to me. good The attached diff to be squashed for the new version. One thing remains: checkpatch.pl says regarding drivers/xen/grant-dma-ops.c: WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? #151: new file mode 100644 Which, I assume, this is not an issue as new file falls under XEN HYPERVISOR INTERFACE maintainership? scripts/get_maintainer.pl -f drivers/xen/grant-dma-ops.c Boris Ostrovsky <boris.ostrovsky@oracle.com> (supporter:XEN HYPERVISOR INTERFACE) Juergen Gross <jgross@suse.com> (supporter:XEN HYPERVISOR INTERFACE) Stefano Stabellini <sstabellini@kernel.org> (reviewer:XEN HYPERVISOR INTERFACE) xen-devel@lists.xenproject.org (moderated list:XEN HYPERVISOR INTERFACE) linux-kernel@vger.kernel.org (open list) [snip]
On 23.04.22 09:05, Oleksandr wrote: > > On 23.04.22 02:00, Stefano Stabellini wrote: > > Hello Stefano > > >> On Fri, 22 Apr 2022, Oleksandr Tyshchenko wrote: >>> From: Juergen Gross <jgross@suse.com> >>> >>> Introduce Xen grant DMA-mapping layer which contains special DMA-mapping >>> routines for providing grant references as DMA addresses to be used by >>> frontends (e.g. virtio) in Xen guests. >>> >>> In order to support virtio in Xen guests add a config option XEN_VIRTIO >>> enabling the user to specify whether in all Xen guests virtio should >>> be able to access memory via Xen grant mappings only on the host side. >>> >>> As this also requires providing arch_has_restricted_virtio_memory_access >>> implementation, switch from a pure stub to a real function on Arm >>> and combine with existing implementation for the SEV guests on x86. >>> >>> Add the needed functionality by providing a special set of DMA ops >>> handling the needed grant operations for the I/O pages. >>> >>> Signed-off-by: Juergen Gross <jgross@suse.com> >>> Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> >> There are a couple of minor things that checkpatch.pl reports, > > Thank you for pointing this out, my fault. > > >> but aside >> from those the patch looks fine to me. > > good > > > The attached diff to be squashed for the new version. One thing remains: > > checkpatch.pl says regarding drivers/xen/grant-dma-ops.c: > > WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? > #151: > new file mode 100644 > > > Which, I assume, this is not an issue as new file falls under XEN HYPERVISOR > INTERFACE maintainership? Yes. Juergen
On 23.04.22 12:10, Juergen Gross wrote: Hello Juergen > On 23.04.22 09:05, Oleksandr wrote: >> >> On 23.04.22 02:00, Stefano Stabellini wrote: >> >> Hello Stefano >> >> >>> On Fri, 22 Apr 2022, Oleksandr Tyshchenko wrote: >>>> From: Juergen Gross <jgross@suse.com> >>>> >>>> Introduce Xen grant DMA-mapping layer which contains special >>>> DMA-mapping >>>> routines for providing grant references as DMA addresses to be used by >>>> frontends (e.g. virtio) in Xen guests. >>>> >>>> In order to support virtio in Xen guests add a config option >>>> XEN_VIRTIO >>>> enabling the user to specify whether in all Xen guests virtio should >>>> be able to access memory via Xen grant mappings only on the host side. >>>> >>>> As this also requires providing >>>> arch_has_restricted_virtio_memory_access >>>> implementation, switch from a pure stub to a real function on Arm >>>> and combine with existing implementation for the SEV guests on x86. >>>> >>>> Add the needed functionality by providing a special set of DMA ops >>>> handling the needed grant operations for the I/O pages. >>>> >>>> Signed-off-by: Juergen Gross <jgross@suse.com> >>>> Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> >>> There are a couple of minor things that checkpatch.pl reports, >> >> Thank you for pointing this out, my fault. >> >> >>> but aside >>> from those the patch looks fine to me. >> >> good >> >> >> The attached diff to be squashed for the new version. One thing remains: >> >> checkpatch.pl says regarding drivers/xen/grant-dma-ops.c: >> >> WARNING: added, moved or deleted file(s), does MAINTAINERS need >> updating? >> #151: >> new file mode 100644 >> >> >> Which, I assume, this is not an issue as new file falls under XEN >> HYPERVISOR INTERFACE maintainership? > > Yes. ok, thank you for the confirmation. > > > Juergen
Please split this into one patch that creates grant-dma-ops, and another that sets up the virtio restricted access helpers. > + > +#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS > +int arch_has_restricted_virtio_memory_access(void) > +{ > + return (xen_has_restricted_virtio_memory_access() || > + cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)); > +} So instead of hardcoding Xen here, this seems like a candidate for another cc_platform_has flag. > +config XEN_VIRTIO > + bool "Xen virtio support" > + default n n is the default default, so no need to specify it. > +// SPDX-License-Identifier: GPL-2.0-only > +/****************************************************************************** The all * line is not the usual kernel style, I'd suggest to drop it. > +static struct page *xen_grant_dma_alloc_pages(struct device *dev, size_t size, > + dma_addr_t *dma_handle, > + enum dma_data_direction dir, > + gfp_t gfp) > +{ > + WARN_ONCE(1, "xen_grant_dma_alloc_pages size %zu\n", size); > + return NULL; > +} > + > +static void xen_grant_dma_free_pages(struct device *dev, size_t size, > + struct page *vaddr, dma_addr_t dma_handle, > + enum dma_data_direction dir) > +{ > + WARN_ONCE(1, "xen_grant_dma_free_pages size %zu\n", size); > +} Please just wire this up to the same implementation as .alloc and .free. > + spin_lock(&xen_grant_dma_lock); > + list_add(&data->list, &xen_grant_dma_devices); > + spin_unlock(&xen_grant_dma_lock); Hmm, having to do this device lookup for every DMA operation is going to suck. It might make sense to add a private field (e.g. as a union with the iommu field) in struct device instead. But if not you probably want to switch to a more efficient data structure like the xarray at least. > +EXPORT_SYMBOL_GPL(xen_grant_setup_dma_ops); I don't think this has any modular users, or did I miss something?
On 23.04.22 19:40, Christoph Hellwig wrote: Hello Christoph > Please split this into one patch that creates grant-dma-ops, and another > that sets up the virtio restricted access helpers. Sounds reasonable, will do: 1. grant-dma-ops.c with config XEN_GRANT_DMA_OPS 2. arch_has_restricted_virtio_memory_access() with config XEN_VIRTIO > >> + >> +#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS >> +int arch_has_restricted_virtio_memory_access(void) >> +{ >> + return (xen_has_restricted_virtio_memory_access() || >> + cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)); >> +} > So instead of hardcoding Xen here, this seems like a candidate for > another cc_platform_has flag. I have a limited knowledge of x86 and Xen on x86. Would the Xen specific bits fit into Confidential Computing Platform checks? I will let Juergen/Boris comment on this. > >> +config XEN_VIRTIO >> + bool "Xen virtio support" >> + default n > n is the default default, so no need to specify it. ok, will drop > >> +// SPDX-License-Identifier: GPL-2.0-only >> +/****************************************************************************** > The all * line is not the usual kernel style, I'd suggest to drop it. ok, will drop > >> +static struct page *xen_grant_dma_alloc_pages(struct device *dev, size_t size, >> + dma_addr_t *dma_handle, >> + enum dma_data_direction dir, >> + gfp_t gfp) >> +{ >> + WARN_ONCE(1, "xen_grant_dma_alloc_pages size %zu\n", size); >> + return NULL; >> +} >> + >> +static void xen_grant_dma_free_pages(struct device *dev, size_t size, >> + struct page *vaddr, dma_addr_t dma_handle, >> + enum dma_data_direction dir) >> +{ >> + WARN_ONCE(1, "xen_grant_dma_free_pages size %zu\n", size); >> +} > Please just wire this up to the same implementation as .alloc and .free. I got it, will implement > >> + spin_lock(&xen_grant_dma_lock); >> + list_add(&data->list, &xen_grant_dma_devices); >> + spin_unlock(&xen_grant_dma_lock); > Hmm, having to do this device lookup for every DMA operation is going > to suck. It might make sense to add a private field (e.g. as a union > with the iommu field) in struct device instead. I was thinking about it, but decided to not alter common struct device for adding Xen specific field, but haven't managed to think of a better idea than just using that brute lookup ... > > But if not you probably want to switch to a more efficient data > structure like the xarray at least. ... I think, this is good point, thank you. I have no idea how faster it is going to be, but the resulting code looks simple (if of course I correctly understood the usage of xarray) diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c index a512c0a..7ecc0b0 100644 --- a/drivers/xen/grant-dma-ops.c +++ b/drivers/xen/grant-dma-ops.c @@ -11,6 +11,7 @@ #include <linux/dma-map-ops.h> #include <linux/of.h> #include <linux/pfn.h> +#include <linux/xarray.h> #include <xen/xen.h> #include <xen/grant_table.h> @@ -19,12 +20,9 @@ struct xen_grant_dma_data { domid_t dev_domid; /* Is device behaving sane? */ bool broken; - struct device *dev; - struct list_head list; }; -static LIST_HEAD(xen_grant_dma_devices); -static DEFINE_SPINLOCK(xen_grant_dma_lock); +static DEFINE_XARRAY(xen_grant_dma_devices); #define XEN_GRANT_DMA_ADDR_OFF (1ULL << 63) @@ -40,21 +38,13 @@ static inline grant_ref_t dma_to_grant(dma_addr_t dma) static struct xen_grant_dma_data *find_xen_grant_dma_data(struct device *dev) { - struct xen_grant_dma_data *data = NULL; - bool found = false; - - spin_lock(&xen_grant_dma_lock); - - list_for_each_entry(data, &xen_grant_dma_devices, list) { - if (data->dev == dev) { - found = true; - break; - } - } + struct xen_grant_dma_data *data; - spin_unlock(&xen_grant_dma_lock); + xa_lock(&xen_grant_dma_devices); + data = xa_load(&xen_grant_dma_devices, (unsigned long)dev); + xa_unlock(&xen_grant_dma_devices); - return found ? data : NULL; + return data; } /* @@ -310,11 +300,12 @@ void xen_grant_setup_dma_ops(struct device *dev) goto err; data->dev_domid = dev_domid; - data->dev = dev; - spin_lock(&xen_grant_dma_lock); - list_add(&data->list, &xen_grant_dma_devices); - spin_unlock(&xen_grant_dma_lock); + if (xa_err(xa_store(&xen_grant_dma_devices, (unsigned long)dev, data, + GFP_KERNEL))) { + dev_err(dev, "Cannot store Xen grant DMA data\n"); + goto err; + } dev->dma_ops = &xen_grant_dma_ops; > >> +EXPORT_SYMBOL_GPL(xen_grant_setup_dma_ops); > I don't think this has any modular users, or did I miss something? No, you didn't. Will drop here and in the next patch for xen_is_grant_dma_device() as well.
On 4/24/22 12:53 PM, Oleksandr wrote: > > On 23.04.22 19:40, Christoph Hellwig wrote: > > > > >> >>> + >>> +#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS >>> +int arch_has_restricted_virtio_memory_access(void) >>> +{ >>> + return (xen_has_restricted_virtio_memory_access() || >>> + cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)); >>> +} >> So instead of hardcoding Xen here, this seems like a candidate for >> another cc_platform_has flag. > > > I have a limited knowledge of x86 and Xen on x86. > > Would the Xen specific bits fit into Confidential Computing Platform checks? I will let Juergen/Boris comment on this. > This is unrelated to confidential so I don't think we can add another CC_ flag. Would arch/x86/kernel/cpu/hypervisor.c be a better home for this? -boris
On 24.04.22 18:53, Oleksandr wrote: > > On 23.04.22 19:40, Christoph Hellwig wrote: > > > Hello Christoph > >> Please split this into one patch that creates grant-dma-ops, and another >> that sets up the virtio restricted access helpers. > > > Sounds reasonable, will do: > > 1. grant-dma-ops.c with config XEN_GRANT_DMA_OPS > > 2. arch_has_restricted_virtio_memory_access() with config XEN_VIRTIO > > >> >>> + >>> +#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS >>> +int arch_has_restricted_virtio_memory_access(void) >>> +{ >>> + return (xen_has_restricted_virtio_memory_access() || >>> + cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)); >>> +} >> So instead of hardcoding Xen here, this seems like a candidate for >> another cc_platform_has flag. > > > I have a limited knowledge of x86 and Xen on x86. > > Would the Xen specific bits fit into Confidential Computing Platform checks? I > will let Juergen/Boris comment on this. I don't think cc_platform_has would be correct here. Xen certainly provides more isolation between guests and dom0, but "Confidential Computing" is basically orthogonal to that feature. Juergen
On 24.04.22 20:08, Boris Ostrovsky wrote: > > On 4/24/22 12:53 PM, Oleksandr wrote: >> >> On 23.04.22 19:40, Christoph Hellwig wrote: >> >> >> >> >>> >>>> + >>>> +#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS >>>> +int arch_has_restricted_virtio_memory_access(void) >>>> +{ >>>> + return (xen_has_restricted_virtio_memory_access() || >>>> + cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)); >>>> +} >>> So instead of hardcoding Xen here, this seems like a candidate for >>> another cc_platform_has flag. >> >> >> I have a limited knowledge of x86 and Xen on x86. >> >> Would the Xen specific bits fit into Confidential Computing Platform checks? I >> will let Juergen/Boris comment on this. >> > > This is unrelated to confidential so I don't think we can add another CC_ flag. > > > Would arch/x86/kernel/cpu/hypervisor.c be a better home for this? Or a callback in struct struct x86_hyper_runtime maybe? Juergen
On Mon, Apr 25, 2022 at 09:47:49AM +0200, Juergen Gross wrote: > > Would the Xen specific bits fit into Confidential Computing Platform > > checks? I will let Juergen/Boris comment on this. > > I don't think cc_platform_has would be correct here. Xen certainly > provides more isolation between guests and dom0, but "Confidential > Computing" is basically orthogonal to that feature. The point of cc_platform_has is to remove all these open code checks. If a Xen hypervisor / dom0 can't access arbitrary guest memory for virtual I/O and we need special APIs for that it certainly false into the scope of cc_platform_has, even if the confientiality is rather limited.
On 25.04.22 09:58, Christoph Hellwig wrote: > On Mon, Apr 25, 2022 at 09:47:49AM +0200, Juergen Gross wrote: >>> Would the Xen specific bits fit into Confidential Computing Platform >>> checks? I will let Juergen/Boris comment on this. >> >> I don't think cc_platform_has would be correct here. Xen certainly >> provides more isolation between guests and dom0, but "Confidential >> Computing" is basically orthogonal to that feature. > > The point of cc_platform_has is to remove all these open code checks. > If a Xen hypervisor / dom0 can't access arbitrary guest memory for > virtual I/O and we need special APIs for that it certainly false > into the scope of cc_platform_has, even if the confientiality is > rather limited. In case the x86 maintainers are fine with that I won't oppose. Juergen
Hello all. On 25.04.22 12:14, Juergen Gross wrote: > On 25.04.22 09:58, Christoph Hellwig wrote: >> On Mon, Apr 25, 2022 at 09:47:49AM +0200, Juergen Gross wrote: >>>> Would the Xen specific bits fit into Confidential Computing Platform >>>> checks? I will let Juergen/Boris comment on this. >>> >>> I don't think cc_platform_has would be correct here. Xen certainly >>> provides more isolation between guests and dom0, but "Confidential >>> Computing" is basically orthogonal to that feature. >> >> The point of cc_platform_has is to remove all these open code checks. >> If a Xen hypervisor / dom0 can't access arbitrary guest memory for >> virtual I/O and we need special APIs for that it certainly false >> into the scope of cc_platform_has, even if the confientiality is >> rather limited. > > In case the x86 maintainers are fine with that I won't oppose. > > > Juergen [I have discussed with Juergen on IRC about it.] Well, if cc_platform_has() is a way to go (at least on x86), below some thoughts about possible integration (if, of course, I got the idea and code correctly). 1. We will need to introduce new attribute CC_ATTR_GUEST_MEM_ACCESS_RESTRICTED as we can't reuse CC_ATTR_GUEST_MEM_ENCRYPT (in case of Xen the Guest memory is not encrypted). New attribute is automatically set if Guest memory encryption is active (CC_ATTR_GUEST_MEM_ENCRYPT is set). Also new attribute is set if restricted memory access using Xen grant mappings is active. This will allow us to have a single check in arch_has_restricted_virtio_memory_access() which covers both cases: Xen and SEV int arch_has_restricted_virtio_memory_access(void) { return cc_platform_has(CC_ATTR_GUEST_MEM_ACCESS_RESTRICTED); } 2. We will need to introduce new vendor CC_VENDOR_XXX for our case (I have chosen XEN, although I am not sure it is a good fit) which deals with new attribute only. 3. Xen code then will call cc_set_vendor(CC_VENDOR_XEN) for different modes (PV, HVM, etc) during initialization if restricted memory access using Xen grant mappings is enabled. Below the diff (not tested and without x86's PVH) how it could look like: diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index ec5b082..0284aa7 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -409,6 +409,14 @@ int __init arch_xen_unpopulated_init(struct resource **res) } #endif +#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS +int arch_has_restricted_virtio_memory_access(void) +{ + return IS_ENABLED(CONFIG_XEN_VIRTIO) && xen_domain(); +} +EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access); +#endif + static void __init xen_dt_guest_init(void) { struct device_node *xen_node; diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index fc1365d..9020a60 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -44,6 +44,7 @@ static bool amd_cc_platform_has(enum cc_attr attr) return sme_me_mask && !(sev_status & MSR_AMD64_SEV_ENABLED); case CC_ATTR_GUEST_MEM_ENCRYPT: + case CC_ATTR_GUEST_MEM_ACCESS_RESTRICTED: return sev_status & MSR_AMD64_SEV_ENABLED; case CC_ATTR_GUEST_STATE_ENCRYPT: @@ -67,7 +68,19 @@ static bool amd_cc_platform_has(enum cc_attr attr) static bool hyperv_cc_platform_has(enum cc_attr attr) { - return attr == CC_ATTR_GUEST_MEM_ENCRYPT; + switch (attr) { + case CC_ATTR_GUEST_MEM_ENCRYPT: + case CC_ATTR_GUEST_MEM_ACCESS_RESTRICTED: + return true; + + default: + return false; + } +} + +static bool xen_cc_platform_has(enum cc_attr attr) +{ + return attr == CC_ATTR_GUEST_MEM_ACCESS_RESTRICTED; } bool cc_platform_has(enum cc_attr attr) @@ -79,6 +92,8 @@ bool cc_platform_has(enum cc_attr attr) return intel_cc_platform_has(attr); case CC_VENDOR_HYPERV: return hyperv_cc_platform_has(attr); + case CC_VENDOR_XEN: + return xen_cc_platform_has(attr); default: return false; } @@ -115,3 +130,11 @@ __init void cc_set_mask(u64 mask) { cc_mask = mask; } + +#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS +int arch_has_restricted_virtio_memory_access(void) +{ + return cc_platform_has(CC_ATTR_GUEST_MEM_ACCESS_RESTRICTED); +} +EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access); +#endif diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 3d98c3a..6395ec1 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -9,6 +9,7 @@ enum cc_vendor { CC_VENDOR_AMD, CC_VENDOR_HYPERV, CC_VENDOR_INTEL, + CC_VENDOR_XEN, }; void cc_set_vendor(enum cc_vendor v); diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 50d2099..dda020f 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -77,8 +77,3 @@ void __init mem_encrypt_init(void) print_mem_encrypt_feature_info(); } -int arch_has_restricted_virtio_memory_access(void) -{ - return cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT); -} -EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access); diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 85246dd..79cb30f 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -8,6 +8,7 @@ config XEN depends on PARAVIRT select PARAVIRT_CLOCK select X86_HV_CALLBACK_VECTOR + select ARCH_HAS_CC_PLATFORM depends on X86_64 || (X86_32 && X86_PAE) depends on X86_LOCAL_APIC && X86_TSC help diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 517a9d8..11c3f4e 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -195,6 +195,9 @@ static void __init xen_hvm_guest_init(void) if (xen_pv_domain()) return; + if (IS_ENABLED(CONFIG_XEN_VIRTIO)) + cc_set_vendor(CC_VENDOR_XEN); + init_hvm_pv_info(); reserve_shared_info(); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5038edb..2fe5aaa 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -109,6 +109,9 @@ static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); static void __init xen_pv_init_platform(void) { + if (IS_ENABLED(CONFIG_XEN_VIRTIO)) + cc_set_vendor(CC_VENDOR_XEN); + populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP)); set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info); diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 313a9127..d3179f8 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -339,4 +339,16 @@ config XEN_GRANT_DMA_OPS bool select DMA_OPS +config XEN_VIRTIO + bool "Xen virtio support" + depends on VIRTIO + select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS + select XEN_GRANT_DMA_OPS + help + Enable virtio support for running as Xen guest. Depending on the + guest type this will require special support on the backend side + (qemu or kernel, depending on the virtio device types used). + + If in doubt, say n. + endmenu diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index efd8205..d06bc7a 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -72,6 +72,19 @@ enum cc_attr { * Examples include TDX guest & SEV. */ CC_ATTR_GUEST_UNROLL_STRING_IO, + + /** + * @CC_ATTR_GUEST_MEM_ACCESS_RESTRICTED: Restricted memory access to + * Guest memory is active + * + * The platform/OS is running as a guest/virtual machine and uses + * the restricted access to its memory. This attribute is set if either + * Guest memory encryption or restricted memory access using Xen grant + * mappings is active. + * + * Examples include Xen guest and SEV. + */ + CC_ATTR_GUEST_MEM_ACCESS_RESTRICTED, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM (END) On Arm I left simple variant simply because of no users of cc_platform. int arch_has_restricted_virtio_memory_access(void) { return IS_ENABLED(CONFIG_XEN_VIRTIO) && xen_domain(); } But, we could have something simple here: bool cc_platform_has(enum cc_attr attr) { switch (attr) { case CC_ATTR_GUEST_MEM_ACCESS_RESTRICTED: return IS_ENABLED(CONFIG_XEN_VIRTIO) && xen_domain(); default: return false; } } int arch_has_restricted_virtio_memory_access(void) { return cc_platform_has(CC_ATTR_GUEST_MEM_ACCESS_RESTRICTED); } Any thoughts?
On Mon, Apr 25, 2022 at 11:38:36PM +0300, Oleksandr wrote: > diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h > index efd8205..d06bc7a 100644 > --- a/include/linux/cc_platform.h > +++ b/include/linux/cc_platform.h > @@ -72,6 +72,19 @@ enum cc_attr { > * Examples include TDX guest & SEV. > */ > CC_ATTR_GUEST_UNROLL_STRING_IO, > + > + /** > + * @CC_ATTR_GUEST_MEM_ACCESS_RESTRICTED: Restricted memory access to > + * Guest memory is active > + * > + * The platform/OS is running as a guest/virtual machine and uses > + * the restricted access to its memory. This attribute is set if > either > + * Guest memory encryption or restricted memory access using Xen > grant > + * mappings is active. > + * > + * Examples include Xen guest and SEV. Wait, whaaat? The cc_platform* stuff is for *confidential computing* guests to check different platform aspects. From quickly skimming over this, this looks like a misuse to me. Why can't you query this from the hypervisor just like you do your other querying about what is supported, etc? Hypercalls, CPUID, whatever...
On 25.04.22 23:25, Borislav Petkov wrote: > On Mon, Apr 25, 2022 at 11:38:36PM +0300, Oleksandr wrote: >> diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h >> index efd8205..d06bc7a 100644 >> --- a/include/linux/cc_platform.h >> +++ b/include/linux/cc_platform.h >> @@ -72,6 +72,19 @@ enum cc_attr { >> * Examples include TDX guest & SEV. >> */ >> CC_ATTR_GUEST_UNROLL_STRING_IO, >> + >> + /** >> + * @CC_ATTR_GUEST_MEM_ACCESS_RESTRICTED: Restricted memory access to >> + * Guest memory is active >> + * >> + * The platform/OS is running as a guest/virtual machine and uses >> + * the restricted access to its memory. This attribute is set if >> either >> + * Guest memory encryption or restricted memory access using Xen >> grant >> + * mappings is active. >> + * >> + * Examples include Xen guest and SEV. > > Wait, whaaat? > > The cc_platform* stuff is for *confidential computing* guests to check > different platform aspects. > > From quickly skimming over this, this looks like a misuse to me. Christoph suggested (rather firmly) this would be the way to go. > > Why can't you query this from the hypervisor just like you do your other > querying about what is supported, etc? Hypercalls, CPUID, whatever... This is needed on guest side at a rather hypervisor independent place. So a capability of some sort seems appropriate. Another suggestion of mine was to have a callback (or flag) in struct x86_hyper_runtime for that purpose. Juergen
On Tue, Apr 26, 2022 at 07:16:16AM +0200, Juergen Gross wrote: > Christoph suggested (rather firmly) this would be the way to go. Yeah, I saw it but I don't think it is the right way to go. What happens the next time a guest needs to query the platform underneath? Misuse these interfaces again? Because people will see the Xen use and say, hey, look, I will use this for my funky HV too. Even worse: what happens if Xen decides to implement SEV/TDX? Then you're in for a world of fun. Now, if we want to *extend* the interfaces to have something as generic as, say, platform_has() and that should be the way for generic kernel code running in the guest to query the platform capabilities, then sure, by all means. > This is needed on guest side at a rather hypervisor independent place. > > So a capability of some sort seems appropriate. > > Another suggestion of mine was to have a callback (or flag) in > struct x86_hyper_runtime for that purpose. This becomes an issue if the HV is not x86 - then you need a different method of querying it, which then underneath will call the arch-specific interface. I don't know how much of querying guests need to do and how they've been doing that so far. Depending on the requirements, we probably should think about a clean design from the get-go instead of homegrown things. Thx.
On 26.04.22 10:41, Borislav Petkov wrote: > On Tue, Apr 26, 2022 at 07:16:16AM +0200, Juergen Gross wrote: >> Christoph suggested (rather firmly) this would be the way to go. > > Yeah, I saw it but I don't think it is the right way to go. > > What happens the next time a guest needs to query the platform > underneath? Misuse these interfaces again? > > Because people will see the Xen use and say, hey, look, I will use this > for my funky HV too. > > Even worse: what happens if Xen decides to implement SEV/TDX? Then > you're in for a world of fun. As the suggestion was to add another flag this wouldn't be a problem IMO. But I agree that coco might be not the best way to go (as I wrote already). > > Now, if we want to *extend* the interfaces to have something as generic > as, say, platform_has() and that should be the way for generic kernel > code running in the guest to query the platform capabilities, then sure, > by all means. I agree. > >> This is needed on guest side at a rather hypervisor independent place. >> >> So a capability of some sort seems appropriate. >> >> Another suggestion of mine was to have a callback (or flag) in >> struct x86_hyper_runtime for that purpose. > > This becomes an issue if the HV is not x86 - then you need a different > method of querying it, which then underneath will call the arch-specific > interface. > > I don't know how much of querying guests need to do and how they've been > doing that so far. Depending on the requirements, we probably should > think about a clean design from the get-go instead of homegrown things. Yes. platform_has() doesn't seem too bad IMO. I will write a patch for starting the discussion. Juergen
On Tue, Apr 26, 2022 at 11:36:40AM +0200, Juergen Gross wrote: > As the suggestion was to add another flag this wouldn't be a problem IMO. We had a problem already with adding one flag would break the same flag on the other guest type. That's why we added cc_vendor too. So it can be tricky. > platform_has() doesn't seem too bad IMO. > > I will write a patch for starting the discussion. Yeah, I guess such a proposal would need a wider audience - maybe CC linux-arch... Thx.
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index ec5b082..49af493 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -409,6 +409,14 @@ int __init arch_xen_unpopulated_init(struct resource **res) } #endif +#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS +int arch_has_restricted_virtio_memory_access(void) +{ + return xen_has_restricted_virtio_memory_access(); +} +EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access); +#endif + static void __init xen_dt_guest_init(void) { struct device_node *xen_node; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d8cfce2..fe84a3e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -8,6 +8,8 @@ #include <linux/kmemleak.h> #include <linux/sched/task.h> +#include <xen/xen.h> + #include <asm/set_memory.h> #include <asm/e820/api.h> #include <asm/init.h> @@ -1065,3 +1067,12 @@ unsigned long max_swapfile_size(void) return pages; } #endif + +#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS +int arch_has_restricted_virtio_memory_access(void) +{ + return (xen_has_restricted_virtio_memory_access() || + cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)); +} +EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access); +#endif diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 50d2099..dda020f 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -77,8 +77,3 @@ void __init mem_encrypt_init(void) print_mem_encrypt_feature_info(); } -int arch_has_restricted_virtio_memory_access(void) -{ - return cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT); -} -EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access); diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 120d32f..b95581f 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -335,4 +335,19 @@ config XEN_UNPOPULATED_ALLOC having to balloon out RAM regions in order to obtain physical memory space to create such mappings. +config XEN_GRANT_DMA_OPS + bool + select DMA_OPS + +config XEN_VIRTIO + bool "Xen virtio support" + default n + depends on VIRTIO + select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS + select XEN_GRANT_DMA_OPS + help + Enable virtio support for running as Xen guest. Depending on the + guest type this will require special support on the backend side + (qemu or kernel, depending on the virtio device types used). + endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 5aae66e..1a23cb0 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -39,3 +39,4 @@ xen-gntalloc-y := gntalloc.o xen-privcmd-y := privcmd.o privcmd-buf.o obj-$(CONFIG_XEN_FRONT_PGDIR_SHBUF) += xen-front-pgdir-shbuf.o obj-$(CONFIG_XEN_UNPOPULATED_ALLOC) += unpopulated-alloc.o +obj-$(CONFIG_XEN_GRANT_DMA_OPS) += grant-dma-ops.o diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c new file mode 100644 index 00000000..0e69aa8 --- /dev/null +++ b/drivers/xen/grant-dma-ops.c @@ -0,0 +1,317 @@ +// SPDX-License-Identifier: GPL-2.0-only +/****************************************************************************** + * Xen grant DMA-mapping layer - contains special DMA-mapping routines + * for providing grant references as DMA addresses to be used by frontends + * (e.g. virtio) in Xen guests + * + * Copyright (c) 2021, Juergen Gross <jgross@suse.com> + */ + +#include <linux/module.h> +#include <linux/dma-map-ops.h> +#include <linux/of.h> +#include <linux/pci.h> +#include <linux/pfn.h> +#include <xen/xen.h> +#include <xen/grant_table.h> + +struct xen_grant_dma_data { + /* The ID of backend domain */ + domid_t dev_domid; + /* Is device behaving sane? */ + bool broken; + struct device *dev; + struct list_head list; +}; + +static LIST_HEAD(xen_grant_dma_devices); +static DEFINE_SPINLOCK(xen_grant_dma_lock); + +#define XEN_GRANT_DMA_ADDR_OFF (1ULL << 63) + +static inline dma_addr_t grant_to_dma(grant_ref_t grant) +{ + return XEN_GRANT_DMA_ADDR_OFF | ((dma_addr_t)grant << PAGE_SHIFT); +} + +static inline grant_ref_t dma_to_grant(dma_addr_t dma) +{ + return (grant_ref_t)((dma & ~XEN_GRANT_DMA_ADDR_OFF) >> PAGE_SHIFT); +} + +static struct xen_grant_dma_data *find_xen_grant_dma_data(struct device *dev) +{ + struct xen_grant_dma_data *data = NULL; + bool found = false; + + spin_lock(&xen_grant_dma_lock); + + list_for_each_entry(data, &xen_grant_dma_devices, list) { + if (data->dev == dev) { + found = true; + break; + } + } + + spin_unlock(&xen_grant_dma_lock); + + return found ? data : NULL; +} + +/* + * DMA ops for Xen frontends (e.g. virtio). + * + * Used to act as a kind of software IOMMU for Xen guests by using grants as + * DMA addresses. + * Such a DMA address is formed by using the grant reference as a frame + * number and setting the highest address bit (this bit is for the backend + * to be able to distinguish it from e.g. a mmio address). + * + * Note that for now we hard wire dom0 to be the backend domain. In order + * to support any domain as backend we'd need to add a way to communicate + * the domid of this backend, e.g. via Xenstore, via the PCI-device's + * config space or DT/ACPI. + */ +static void *xen_grant_dma_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + unsigned long attrs) +{ + struct xen_grant_dma_data *data; + unsigned int i, n_pages = PFN_UP(size); + unsigned long pfn; + grant_ref_t grant; + void *ret; + + data = find_xen_grant_dma_data(dev); + if (!data) + return NULL; + + if (unlikely(data->broken)) + return NULL; + + ret = alloc_pages_exact(n_pages * PAGE_SIZE, gfp); + if (!ret) + return NULL; + + pfn = virt_to_pfn(ret); + + if (gnttab_alloc_grant_reference_seq(n_pages, &grant)) { + free_pages_exact(ret, n_pages * PAGE_SIZE); + return NULL; + } + + for (i = 0; i < n_pages; i++) { + gnttab_grant_foreign_access_ref(grant + i, data->dev_domid, + pfn_to_gfn(pfn + i), 0); + } + + *dma_handle = grant_to_dma(grant); + + return ret; +} + +static void xen_grant_dma_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs) +{ + struct xen_grant_dma_data *data; + unsigned int i, n_pages = PFN_UP(size); + grant_ref_t grant; + + data = find_xen_grant_dma_data(dev); + if (!data) + return; + + if (unlikely(data->broken)) + return; + + grant = dma_to_grant(dma_handle); + + for (i = 0; i < n_pages; i++) { + if (unlikely(!gnttab_end_foreign_access_ref(grant + i))) { + dev_alert(dev, "Grant still in use by backend domain, disabled for further use\n"); + data->broken = true; + return; + } + } + + gnttab_free_grant_reference_seq(grant, n_pages); + + free_pages_exact(vaddr, n_pages * PAGE_SIZE); +} + +static struct page *xen_grant_dma_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, + enum dma_data_direction dir, + gfp_t gfp) +{ + WARN_ONCE(1, "xen_grant_dma_alloc_pages size %zu\n", size); + return NULL; +} + +static void xen_grant_dma_free_pages(struct device *dev, size_t size, + struct page *vaddr, dma_addr_t dma_handle, + enum dma_data_direction dir) +{ + WARN_ONCE(1, "xen_grant_dma_free_pages size %zu\n", size); +} + +static dma_addr_t xen_grant_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + struct xen_grant_dma_data *data; + unsigned int i, n_pages = PFN_UP(size); + grant_ref_t grant; + dma_addr_t dma_handle; + + BUG_ON(dir == DMA_NONE); + + data = find_xen_grant_dma_data(dev); + if (!data) + return DMA_MAPPING_ERROR; + + if (unlikely(data->broken)) + return DMA_MAPPING_ERROR; + + if (gnttab_alloc_grant_reference_seq(n_pages, &grant)) + return DMA_MAPPING_ERROR; + + for (i = 0; i < n_pages; i++) { + gnttab_grant_foreign_access_ref(grant + i, data->dev_domid, + xen_page_to_gfn(page) + i, dir == DMA_TO_DEVICE); + } + + dma_handle = grant_to_dma(grant) + offset; + + return dma_handle; +} + +static void xen_grant_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct xen_grant_dma_data *data; + unsigned int i, n_pages = PFN_UP(size); + grant_ref_t grant; + + BUG_ON(dir == DMA_NONE); + + data = find_xen_grant_dma_data(dev); + if (!data) + return; + + if (unlikely(data->broken)) + return; + + grant = dma_to_grant(dma_handle); + + for (i = 0; i < n_pages; i++) { + if (unlikely(!gnttab_end_foreign_access_ref(grant + i))) { + dev_alert(dev, "Grant still in use by backend domain, disabled for further use\n"); + data->broken = true; + return; + } + } + + gnttab_free_grant_reference_seq(grant, n_pages); +} + +static void xen_grant_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + struct scatterlist *s; + unsigned int i; + + BUG_ON(dir == DMA_NONE); + + for_each_sg(sg, s, nents, i) + xen_grant_dma_unmap_page(dev, s->dma_address, sg_dma_len(s), dir, + attrs); +} + +static int xen_grant_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + struct scatterlist *s; + unsigned int i; + + BUG_ON(dir == DMA_NONE); + + for_each_sg(sg, s, nents, i) { + s->dma_address = xen_grant_dma_map_page(dev, sg_page(s), s->offset, + s->length, dir, attrs); + if (s->dma_address == DMA_MAPPING_ERROR) + goto out; + + sg_dma_len(s) = s->length; + } + + return nents; + +out: + xen_grant_dma_unmap_sg(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + sg_dma_len(sg) = 0; + + return -EIO; +} + +static int xen_grant_dma_supported(struct device *dev, u64 mask) +{ + return mask == DMA_BIT_MASK(64); +} + +static const struct dma_map_ops xen_grant_dma_ops = { + .alloc = xen_grant_dma_alloc, + .free = xen_grant_dma_free, + .alloc_pages = xen_grant_dma_alloc_pages, + .free_pages = xen_grant_dma_free_pages, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, + .map_page = xen_grant_dma_map_page, + .unmap_page = xen_grant_dma_unmap_page, + .map_sg = xen_grant_dma_map_sg, + .unmap_sg = xen_grant_dma_unmap_sg, + .dma_supported = xen_grant_dma_supported, +}; + +void xen_grant_setup_dma_ops(struct device *dev) +{ + struct xen_grant_dma_data *data; + uint32_t dev_domid; + + data = find_xen_grant_dma_data(dev); + if (data) { + dev_err(dev, "Xen grant DMA data is already created\n"); + return; + } + + /* XXX The dom0 is hardcoded as the backend domain for now */ + dev_domid = 0; + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) { + dev_err(dev, "Сannot allocate Xen grant DMA data\n"); + goto err; + } + data->dev_domid = dev_domid; + data->dev = dev; + + spin_lock(&xen_grant_dma_lock); + list_add(&data->list, &xen_grant_dma_devices); + spin_unlock(&xen_grant_dma_lock); + + dev->dma_ops = &xen_grant_dma_ops; + + return; + +err: + dev_err(dev, "Сannot set up Xen grant DMA ops, retain platform DMA ops\n"); +} +EXPORT_SYMBOL_GPL(xen_grant_setup_dma_ops); + +MODULE_DESCRIPTION("Xen grant DMA-mapping layer"); +MODULE_AUTHOR("Juergen Gross <jgross@suse.com>"); +MODULE_LICENSE("GPL"); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index a3584a3..4f9fad5 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -221,4 +221,12 @@ static inline void xen_preemptible_hcall_end(void) { } #endif /* CONFIG_XEN_PV && !CONFIG_PREEMPTION */ +#ifdef CONFIG_XEN_GRANT_DMA_OPS +void xen_grant_setup_dma_ops(struct device *dev); +#else +static inline void xen_grant_setup_dma_ops(struct device *dev) +{ +} +#endif /* CONFIG_XEN_GRANT_DMA_OPS */ + #endif /* INCLUDE_XEN_OPS_H */ diff --git a/include/xen/xen.h b/include/xen/xen.h index a99bab8..fe6e6bb 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -52,6 +52,11 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, extern u64 xen_saved_max_mem_size; #endif +static inline int xen_has_restricted_virtio_memory_access(void) +{ + return IS_ENABLED(CONFIG_XEN_VIRTIO) && xen_domain(); +} + #ifdef CONFIG_XEN_UNPOPULATED_ALLOC int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages); void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages);