Message ID | 20211123143039.331929-5-ltykernel@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | x86/Hyper-V: Add Hyper-V Isolation VM support(Second part) | expand |
From: Tianyu Lan <ltykernel@gmail.com> Sent: Tuesday, November 23, 2021 6:31 AM > > hyperv Isolation VM requires bounce buffer support to copy > data from/to encrypted memory and so enable swiotlb force > mode to use swiotlb bounce buffer for DMA transaction. > > In Isolation VM with AMD SEV, the bounce buffer needs to be > accessed via extra address space which is above shared_gpa_boundary > (E.G 39 bit address line) reported by Hyper-V CPUID ISOLATION_CONFIG. > The access physical address will be original physical address + > shared_gpa_boundary. The shared_gpa_boundary in the AMD SEV SNP > spec is called virtual top of memory(vTOM). Memory addresses below > vTOM are automatically treated as private while memory above > vTOM is treated as shared. > > Hyper-V initalizes swiotlb bounce buffer and default swiotlb > needs to be disabled. pci_swiotlb_detect_override() and > pci_swiotlb_detect_4gb() enable the default one. To override > the setting, hyperv_swiotlb_detect() needs to run before > these detect functions which depends on the pci_xen_swiotlb_ > init(). Make pci_xen_swiotlb_init() depends on the hyperv_swiotlb > _detect() to keep the order. > > Swiotlb bounce buffer code calls set_memory_decrypted() > to mark bounce buffer visible to host and map it in extra > address space via memremap. Populate the shared_gpa_boundary > (vTOM) via swiotlb_unencrypted_base variable. > > The map function memremap() can't work in the early place > hyperv_iommu_swiotlb_init() and so call swiotlb_update_mem_attributes() > in the hyperv_iommu_swiotlb_later_init(). > > Add Hyper-V dma ops and provide alloc/free and vmap/vunmap noncontiguous > callback to handle request of allocating and mapping noncontiguous dma > memory in vmbus device driver. Netvsc driver will use this. Set dma_ops_ > bypass flag for hv device to use dma direct functions during mapping/unmapping > dma page. > > Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com> > --- > Change since v1: > * Remove hv isolation check in the sev_setup_arch() > > arch/x86/mm/mem_encrypt.c | 1 + > arch/x86/xen/pci-swiotlb-xen.c | 3 +- > drivers/hv/Kconfig | 1 + > drivers/hv/vmbus_drv.c | 6 ++ > drivers/iommu/hyperv-iommu.c | 164 +++++++++++++++++++++++++++++++++ > include/linux/hyperv.h | 10 ++ > 6 files changed, 184 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c > index 35487305d8af..e48c73b3dd41 100644 > --- a/arch/x86/mm/mem_encrypt.c > +++ b/arch/x86/mm/mem_encrypt.c > @@ -31,6 +31,7 @@ > #include <asm/processor-flags.h> > #include <asm/msr.h> > #include <asm/cmdline.h> > +#include <asm/mshyperv.h> There is no longer any need to add this #include since code changes to this file in a previous version of the patch are now gone. > > #include "mm_internal.h" > > diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c > index 46df59aeaa06..30fd0600b008 100644 > --- a/arch/x86/xen/pci-swiotlb-xen.c > +++ b/arch/x86/xen/pci-swiotlb-xen.c > @@ -4,6 +4,7 @@ > > #include <linux/dma-map-ops.h> > #include <linux/pci.h> > +#include <linux/hyperv.h> > #include <xen/swiotlb-xen.h> > > #include <asm/xen/hypervisor.h> > @@ -91,6 +92,6 @@ int pci_xen_swiotlb_init_late(void) > EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late); > > IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, > - NULL, > + hyperv_swiotlb_detect, > pci_xen_swiotlb_init, > NULL); > diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig > index dd12af20e467..d43b4cd88f57 100644 > --- a/drivers/hv/Kconfig > +++ b/drivers/hv/Kconfig > @@ -9,6 +9,7 @@ config HYPERV > select PARAVIRT > select X86_HV_CALLBACK_VECTOR if X86 > select VMAP_PFN > + select DMA_OPS_BYPASS > help > Select this option to run Linux as a Hyper-V client operating > system. > diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c > index 392c1ac4f819..32dc193e31cd 100644 > --- a/drivers/hv/vmbus_drv.c > +++ b/drivers/hv/vmbus_drv.c > @@ -33,6 +33,7 @@ > #include <linux/random.h> > #include <linux/kernel.h> > #include <linux/syscore_ops.h> > +#include <linux/dma-map-ops.h> > #include <clocksource/hyperv_timer.h> > #include "hyperv_vmbus.h" > > @@ -2078,6 +2079,7 @@ struct hv_device *vmbus_device_create(const guid_t *type, > return child_device_obj; > } > > +static u64 vmbus_dma_mask = DMA_BIT_MASK(64); > /* > * vmbus_device_register - Register the child device > */ > @@ -2118,6 +2120,10 @@ int vmbus_device_register(struct hv_device *child_device_obj) > } > hv_debug_add_dev_dir(child_device_obj); > > + child_device_obj->device.dma_ops_bypass = true; > + child_device_obj->device.dma_ops = &hyperv_iommu_dma_ops; > + child_device_obj->device.dma_mask = &vmbus_dma_mask; > + child_device_obj->device.dma_parms = &child_device_obj->dma_parms; > return 0; > > err_kset_unregister: > diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c > index e285a220c913..ebcb628e7e8f 100644 > --- a/drivers/iommu/hyperv-iommu.c > +++ b/drivers/iommu/hyperv-iommu.c > @@ -13,14 +13,21 @@ > #include <linux/irq.h> > #include <linux/iommu.h> > #include <linux/module.h> > +#include <linux/hyperv.h> > +#include <linux/io.h> > > #include <asm/apic.h> > #include <asm/cpu.h> > #include <asm/hw_irq.h> > #include <asm/io_apic.h> > +#include <asm/iommu.h> > +#include <asm/iommu_table.h> > #include <asm/irq_remapping.h> > #include <asm/hypervisor.h> > #include <asm/mshyperv.h> > +#include <asm/swiotlb.h> > +#include <linux/dma-map-ops.h> > +#include <linux/dma-direct.h> > > #include "irq_remapping.h" > > @@ -337,4 +344,161 @@ static const struct irq_domain_ops hyperv_root_ir_domain_ops = { > .free = hyperv_root_irq_remapping_free, > }; > > +static void __init hyperv_iommu_swiotlb_init(void) > +{ > + unsigned long hyperv_io_tlb_size; > + void *hyperv_io_tlb_start; > + > + /* > + * Allocate Hyper-V swiotlb bounce buffer at early place > + * to reserve large contiguous memory. > + */ > + hyperv_io_tlb_size = swiotlb_size_or_default(); > + hyperv_io_tlb_start = memblock_alloc(hyperv_io_tlb_size, PAGE_SIZE); > + > + if (!hyperv_io_tlb_start) > + pr_warn("Fail to allocate Hyper-V swiotlb buffer.\n"); > + > + swiotlb_init_with_tbl(hyperv_io_tlb_start, > + hyperv_io_tlb_size >> IO_TLB_SHIFT, true); > +} > + > +int __init hyperv_swiotlb_detect(void) > +{ > + if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) > + return 0; > + > + if (!hv_is_isolation_supported()) > + return 0; > + > + /* > + * Enable swiotlb force mode in Isolation VM to > + * use swiotlb bounce buffer for dma transaction. > + */ > + if (hv_isolation_type_snp()) > + swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary; > + swiotlb_force = SWIOTLB_FORCE; > + return 1; > +} > + > +static void __init hyperv_iommu_swiotlb_later_init(void) > +{ > + /* > + * Swiotlb bounce buffer needs to be mapped in extra address > + * space. Map function doesn't work in the early place and so > + * call swiotlb_update_mem_attributes() here. > + */ > + swiotlb_update_mem_attributes(); > +} > + > +IOMMU_INIT_FINISH(hyperv_swiotlb_detect, > + NULL, hyperv_iommu_swiotlb_init, > + hyperv_iommu_swiotlb_later_init); > + > +static struct sg_table *hyperv_dma_alloc_noncontiguous(struct device *dev, > + size_t size, enum dma_data_direction dir, gfp_t gfp, > + unsigned long attrs) > +{ > + struct dma_sgt_handle *sh; > + struct page **pages; > + int num_pages = size >> PAGE_SHIFT; This assumes "size" is a multiple of PAGE_SIZE. Probably should round up for safety. > + void *vaddr, *ptr; > + int rc, i; > + > + if (!hv_isolation_type_snp()) > + return NULL; > + > + sh = kmalloc(sizeof(*sh), gfp); > + if (!sh) > + return NULL; > + > + vaddr = vmalloc(size); > + if (!vaddr) > + goto free_sgt; > + > + pages = kvmalloc_array(num_pages, sizeof(struct page *), > + GFP_KERNEL | __GFP_ZERO); > + if (!pages) > + goto free_mem; > + > + for (i = 0, ptr = vaddr; i < num_pages; ++i, ptr += PAGE_SIZE) > + pages[i] = vmalloc_to_page(ptr); > + > + rc = sg_alloc_table_from_pages(&sh->sgt, pages, num_pages, 0, size, GFP_KERNEL); > + if (rc) > + goto free_pages; > + > + sh->sgt.sgl->dma_address = (dma_addr_t)vaddr; > + sh->sgt.sgl->dma_length = size; include/linux/scatterlist.h defines macros sg_dma_address() and sg_dma_len() for accessing these two fields. It's probably best to use them. > + sh->pages = pages; > + > + return &sh->sgt; > + > +free_pages: > + kvfree(pages); > +free_mem: > + vfree(vaddr); > +free_sgt: > + kfree(sh); > + return NULL; > +} > + > +static void hyperv_dma_free_noncontiguous(struct device *dev, size_t size, > + struct sg_table *sgt, enum dma_data_direction dir) > +{ > + struct dma_sgt_handle *sh = sgt_handle(sgt); > + > + if (!hv_isolation_type_snp()) > + return; > + > + vfree((void *)sh->sgt.sgl->dma_address); Use sg_dma_address() > + sg_free_table(&sh->sgt); > + kvfree(sh->pages); > + kfree(sh); > +} > + > +static void *hyperv_dma_vmap_noncontiguous(struct device *dev, size_t size, > + struct sg_table *sgt) > +{ > + int pg_count = size >> PAGE_SHIFT; Round up so don't assume size is a multiple of PAGE_SIZE? > + unsigned long *pfns; > + struct page **pages = sgt_handle(sgt)->pages; > + void *vaddr = NULL; > + int i; > + > + if (!hv_isolation_type_snp()) > + return NULL; > + > + if (!pages) > + return NULL; > + > + pfns = kcalloc(pg_count, sizeof(*pfns), GFP_KERNEL); > + if (!pfns) > + return NULL; > + > + for (i = 0; i < pg_count; i++) > + pfns[i] = page_to_pfn(pages[i]) + > + (ms_hyperv.shared_gpa_boundary >> PAGE_SHIFT); > + > + vaddr = vmap_pfn(pfns, pg_count, PAGE_KERNEL); > + kfree(pfns); > + return vaddr; > + > +} > + > +static void hyperv_dma_vunmap_noncontiguous(struct device *dev, void *addr) > +{ > + if (!hv_isolation_type_snp()) > + return; > + vunmap(addr); > +} > + > +const struct dma_map_ops hyperv_iommu_dma_ops = { > + .alloc_noncontiguous = hyperv_dma_alloc_noncontiguous, > + .free_noncontiguous = hyperv_dma_free_noncontiguous, > + .vmap_noncontiguous = hyperv_dma_vmap_noncontiguous, > + .vunmap_noncontiguous = hyperv_dma_vunmap_noncontiguous, > +}; > +EXPORT_SYMBOL_GPL(hyperv_iommu_dma_ops); > + > #endif > diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h > index b823311eac79..4d44fb3b3f1c 100644 > --- a/include/linux/hyperv.h > +++ b/include/linux/hyperv.h > @@ -1726,6 +1726,16 @@ int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len, > int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context, > void (*block_invalidate)(void *context, > u64 block_mask)); > +#ifdef CONFIG_HYPERV > +int __init hyperv_swiotlb_detect(void); > +#else > +static inline int __init hyperv_swiotlb_detect(void) > +{ > + return 0; > +} > +#endif > + > +extern const struct dma_map_ops hyperv_iommu_dma_ops; > > struct hyperv_pci_block_ops { > int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len, > -- > 2.25.1
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 35487305d8af..e48c73b3dd41 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -31,6 +31,7 @@ #include <asm/processor-flags.h> #include <asm/msr.h> #include <asm/cmdline.h> +#include <asm/mshyperv.h> #include "mm_internal.h" diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 46df59aeaa06..30fd0600b008 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -4,6 +4,7 @@ #include <linux/dma-map-ops.h> #include <linux/pci.h> +#include <linux/hyperv.h> #include <xen/swiotlb-xen.h> #include <asm/xen/hypervisor.h> @@ -91,6 +92,6 @@ int pci_xen_swiotlb_init_late(void) EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late); IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, - NULL, + hyperv_swiotlb_detect, pci_xen_swiotlb_init, NULL); diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index dd12af20e467..d43b4cd88f57 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -9,6 +9,7 @@ config HYPERV select PARAVIRT select X86_HV_CALLBACK_VECTOR if X86 select VMAP_PFN + select DMA_OPS_BYPASS help Select this option to run Linux as a Hyper-V client operating system. diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 392c1ac4f819..32dc193e31cd 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -33,6 +33,7 @@ #include <linux/random.h> #include <linux/kernel.h> #include <linux/syscore_ops.h> +#include <linux/dma-map-ops.h> #include <clocksource/hyperv_timer.h> #include "hyperv_vmbus.h" @@ -2078,6 +2079,7 @@ struct hv_device *vmbus_device_create(const guid_t *type, return child_device_obj; } +static u64 vmbus_dma_mask = DMA_BIT_MASK(64); /* * vmbus_device_register - Register the child device */ @@ -2118,6 +2120,10 @@ int vmbus_device_register(struct hv_device *child_device_obj) } hv_debug_add_dev_dir(child_device_obj); + child_device_obj->device.dma_ops_bypass = true; + child_device_obj->device.dma_ops = &hyperv_iommu_dma_ops; + child_device_obj->device.dma_mask = &vmbus_dma_mask; + child_device_obj->device.dma_parms = &child_device_obj->dma_parms; return 0; err_kset_unregister: diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c index e285a220c913..ebcb628e7e8f 100644 --- a/drivers/iommu/hyperv-iommu.c +++ b/drivers/iommu/hyperv-iommu.c @@ -13,14 +13,21 @@ #include <linux/irq.h> #include <linux/iommu.h> #include <linux/module.h> +#include <linux/hyperv.h> +#include <linux/io.h> #include <asm/apic.h> #include <asm/cpu.h> #include <asm/hw_irq.h> #include <asm/io_apic.h> +#include <asm/iommu.h> +#include <asm/iommu_table.h> #include <asm/irq_remapping.h> #include <asm/hypervisor.h> #include <asm/mshyperv.h> +#include <asm/swiotlb.h> +#include <linux/dma-map-ops.h> +#include <linux/dma-direct.h> #include "irq_remapping.h" @@ -337,4 +344,161 @@ static const struct irq_domain_ops hyperv_root_ir_domain_ops = { .free = hyperv_root_irq_remapping_free, }; +static void __init hyperv_iommu_swiotlb_init(void) +{ + unsigned long hyperv_io_tlb_size; + void *hyperv_io_tlb_start; + + /* + * Allocate Hyper-V swiotlb bounce buffer at early place + * to reserve large contiguous memory. + */ + hyperv_io_tlb_size = swiotlb_size_or_default(); + hyperv_io_tlb_start = memblock_alloc(hyperv_io_tlb_size, PAGE_SIZE); + + if (!hyperv_io_tlb_start) + pr_warn("Fail to allocate Hyper-V swiotlb buffer.\n"); + + swiotlb_init_with_tbl(hyperv_io_tlb_start, + hyperv_io_tlb_size >> IO_TLB_SHIFT, true); +} + +int __init hyperv_swiotlb_detect(void) +{ + if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) + return 0; + + if (!hv_is_isolation_supported()) + return 0; + + /* + * Enable swiotlb force mode in Isolation VM to + * use swiotlb bounce buffer for dma transaction. + */ + if (hv_isolation_type_snp()) + swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary; + swiotlb_force = SWIOTLB_FORCE; + return 1; +} + +static void __init hyperv_iommu_swiotlb_later_init(void) +{ + /* + * Swiotlb bounce buffer needs to be mapped in extra address + * space. Map function doesn't work in the early place and so + * call swiotlb_update_mem_attributes() here. + */ + swiotlb_update_mem_attributes(); +} + +IOMMU_INIT_FINISH(hyperv_swiotlb_detect, + NULL, hyperv_iommu_swiotlb_init, + hyperv_iommu_swiotlb_later_init); + +static struct sg_table *hyperv_dma_alloc_noncontiguous(struct device *dev, + size_t size, enum dma_data_direction dir, gfp_t gfp, + unsigned long attrs) +{ + struct dma_sgt_handle *sh; + struct page **pages; + int num_pages = size >> PAGE_SHIFT; + void *vaddr, *ptr; + int rc, i; + + if (!hv_isolation_type_snp()) + return NULL; + + sh = kmalloc(sizeof(*sh), gfp); + if (!sh) + return NULL; + + vaddr = vmalloc(size); + if (!vaddr) + goto free_sgt; + + pages = kvmalloc_array(num_pages, sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!pages) + goto free_mem; + + for (i = 0, ptr = vaddr; i < num_pages; ++i, ptr += PAGE_SIZE) + pages[i] = vmalloc_to_page(ptr); + + rc = sg_alloc_table_from_pages(&sh->sgt, pages, num_pages, 0, size, GFP_KERNEL); + if (rc) + goto free_pages; + + sh->sgt.sgl->dma_address = (dma_addr_t)vaddr; + sh->sgt.sgl->dma_length = size; + sh->pages = pages; + + return &sh->sgt; + +free_pages: + kvfree(pages); +free_mem: + vfree(vaddr); +free_sgt: + kfree(sh); + return NULL; +} + +static void hyperv_dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir) +{ + struct dma_sgt_handle *sh = sgt_handle(sgt); + + if (!hv_isolation_type_snp()) + return; + + vfree((void *)sh->sgt.sgl->dma_address); + sg_free_table(&sh->sgt); + kvfree(sh->pages); + kfree(sh); +} + +static void *hyperv_dma_vmap_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt) +{ + int pg_count = size >> PAGE_SHIFT; + unsigned long *pfns; + struct page **pages = sgt_handle(sgt)->pages; + void *vaddr = NULL; + int i; + + if (!hv_isolation_type_snp()) + return NULL; + + if (!pages) + return NULL; + + pfns = kcalloc(pg_count, sizeof(*pfns), GFP_KERNEL); + if (!pfns) + return NULL; + + for (i = 0; i < pg_count; i++) + pfns[i] = page_to_pfn(pages[i]) + + (ms_hyperv.shared_gpa_boundary >> PAGE_SHIFT); + + vaddr = vmap_pfn(pfns, pg_count, PAGE_KERNEL); + kfree(pfns); + return vaddr; + +} + +static void hyperv_dma_vunmap_noncontiguous(struct device *dev, void *addr) +{ + if (!hv_isolation_type_snp()) + return; + vunmap(addr); +} + +const struct dma_map_ops hyperv_iommu_dma_ops = { + .alloc_noncontiguous = hyperv_dma_alloc_noncontiguous, + .free_noncontiguous = hyperv_dma_free_noncontiguous, + .vmap_noncontiguous = hyperv_dma_vmap_noncontiguous, + .vunmap_noncontiguous = hyperv_dma_vunmap_noncontiguous, +}; +EXPORT_SYMBOL_GPL(hyperv_iommu_dma_ops); + #endif diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b823311eac79..4d44fb3b3f1c 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1726,6 +1726,16 @@ int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len, int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context, void (*block_invalidate)(void *context, u64 block_mask)); +#ifdef CONFIG_HYPERV +int __init hyperv_swiotlb_detect(void); +#else +static inline int __init hyperv_swiotlb_detect(void) +{ + return 0; +} +#endif + +extern const struct dma_map_ops hyperv_iommu_dma_ops; struct hyperv_pci_block_ops { int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len,