Message ID | 20181009132500.17643-17-hch@lst.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/33] powerpc: use mm zones more sensibly | expand |
On 10/10/2018 00:24, Christoph Hellwig wrote: > This code has been unused since it was merged and is in the way of > cleaning up the DMA code, thus remove it. > > This effectively reverts commit 5d2aa710 ("powerpc/powernv: Add support > for Nvlink NPUs"). This code is heavily used by the NVIDIA GPU driver. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > arch/powerpc/include/asm/pci.h | 3 - > arch/powerpc/include/asm/powernv.h | 23 - > arch/powerpc/platforms/powernv/Makefile | 2 +- > arch/powerpc/platforms/powernv/npu-dma.c | 999 ---------------------- > arch/powerpc/platforms/powernv/pci-ioda.c | 243 ------ > arch/powerpc/platforms/powernv/pci.h | 11 - > 6 files changed, 1 insertion(+), 1280 deletions(-) > delete mode 100644 arch/powerpc/platforms/powernv/npu-dma.c > > diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h > index 2af9ded80540..a01d2e3d6ff9 100644 > --- a/arch/powerpc/include/asm/pci.h > +++ b/arch/powerpc/include/asm/pci.h > @@ -127,7 +127,4 @@ extern void pcibios_scan_phb(struct pci_controller *hose); > > #endif /* __KERNEL__ */ > > -extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev); > -extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index); > - > #endif /* __ASM_POWERPC_PCI_H */ > diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h > index 2f3ff7a27881..4848a6b3c6b2 100644 > --- a/arch/powerpc/include/asm/powernv.h > +++ b/arch/powerpc/include/asm/powernv.h > @@ -11,33 +11,10 @@ > #define _ASM_POWERNV_H > > #ifdef CONFIG_PPC_POWERNV > -#define NPU2_WRITE 1 > extern void powernv_set_nmmu_ptcr(unsigned long ptcr); > -extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, > - unsigned long flags, > - void (*cb)(struct npu_context *, void *), > - void *priv); > -extern void pnv_npu2_destroy_context(struct npu_context *context, > - struct pci_dev *gpdev); > -extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea, > - unsigned long *flags, unsigned long *status, > - int count); > - > void pnv_tm_init(void); > #else > static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { } > -static inline struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, > - unsigned long flags, > - struct npu_context *(*cb)(struct npu_context *, void *), > - void *priv) { return ERR_PTR(-ENODEV); } > -static inline void pnv_npu2_destroy_context(struct npu_context *context, > - struct pci_dev *gpdev) { } > - > -static inline int pnv_npu2_handle_fault(struct npu_context *context, > - uintptr_t *ea, unsigned long *flags, > - unsigned long *status, int count) { > - return -ENODEV; > -} > > static inline void pnv_tm_init(void) { } > static inline void pnv_power9_force_smt4(void) { } > diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile > index b540ce8eec55..2b13e9dd137c 100644 > --- a/arch/powerpc/platforms/powernv/Makefile > +++ b/arch/powerpc/platforms/powernv/Makefile > @@ -6,7 +6,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o > obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o > > obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o > -obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o > +obj-$(CONFIG_PCI) += pci.o pci-ioda.o pci-ioda-tce.o > obj-$(CONFIG_CXL_BASE) += pci-cxl.o > obj-$(CONFIG_EEH) += eeh-powernv.o > obj-$(CONFIG_PPC_SCOM) += opal-xscom.o > diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c > deleted file mode 100644 > index 8006c54a91e3..000000000000 > --- a/arch/powerpc/platforms/powernv/npu-dma.c > +++ /dev/null > @@ -1,999 +0,0 @@ > -/* > - * This file implements the DMA operations for NVLink devices. The NPU > - * devices all point to the same iommu table as the parent PCI device. > - * > - * Copyright Alistair Popple, IBM Corporation 2015. > - * > - * This program is free software; you can redistribute it and/or > - * modify it under the terms of version 2 of the GNU General Public > - * License as published by the Free Software Foundation. > - */ > - > -#include <linux/slab.h> > -#include <linux/mmu_notifier.h> > -#include <linux/mmu_context.h> > -#include <linux/of.h> > -#include <linux/export.h> > -#include <linux/pci.h> > -#include <linux/memblock.h> > -#include <linux/iommu.h> > -#include <linux/debugfs.h> > - > -#include <asm/debugfs.h> > -#include <asm/tlb.h> > -#include <asm/powernv.h> > -#include <asm/reg.h> > -#include <asm/opal.h> > -#include <asm/io.h> > -#include <asm/iommu.h> > -#include <asm/pnv-pci.h> > -#include <asm/msi_bitmap.h> > -#include <asm/opal.h> > - > -#include "powernv.h" > -#include "pci.h" > - > -#define npu_to_phb(x) container_of(x, struct pnv_phb, npu) > - > -/* > - * spinlock to protect initialisation of an npu_context for a particular > - * mm_struct. > - */ > -static DEFINE_SPINLOCK(npu_context_lock); > - > -/* > - * When an address shootdown range exceeds this threshold we invalidate the > - * entire TLB on the GPU for the given PID rather than each specific address in > - * the range. > - */ > -static uint64_t atsd_threshold = 2 * 1024 * 1024; > -static struct dentry *atsd_threshold_dentry; > - > -/* > - * Other types of TCE cache invalidation are not functional in the > - * hardware. > - */ > -static struct pci_dev *get_pci_dev(struct device_node *dn) > -{ > - struct pci_dn *pdn = PCI_DN(dn); > - > - return pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus), > - pdn->busno, pdn->devfn); > -} > - > -/* Given a NPU device get the associated PCI device. */ > -struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev) > -{ > - struct device_node *dn; > - struct pci_dev *gpdev; > - > - if (WARN_ON(!npdev)) > - return NULL; > - > - if (WARN_ON(!npdev->dev.of_node)) > - return NULL; > - > - /* Get assoicated PCI device */ > - dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0); > - if (!dn) > - return NULL; > - > - gpdev = get_pci_dev(dn); > - of_node_put(dn); > - > - return gpdev; > -} > -EXPORT_SYMBOL(pnv_pci_get_gpu_dev); > - > -/* Given the real PCI device get a linked NPU device. */ > -struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index) > -{ > - struct device_node *dn; > - struct pci_dev *npdev; > - > - if (WARN_ON(!gpdev)) > - return NULL; > - > - /* Not all PCI devices have device-tree nodes */ > - if (!gpdev->dev.of_node) > - return NULL; > - > - /* Get assoicated PCI device */ > - dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index); > - if (!dn) > - return NULL; > - > - npdev = get_pci_dev(dn); > - of_node_put(dn); > - > - return npdev; > -} > -EXPORT_SYMBOL(pnv_pci_get_npu_dev); > - > -#define NPU_DMA_OP_UNSUPPORTED() \ > - dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \ > - __func__) > - > -static void *dma_npu_alloc(struct device *dev, size_t size, > - dma_addr_t *dma_handle, gfp_t flag, > - unsigned long attrs) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > - return NULL; > -} > - > -static void dma_npu_free(struct device *dev, size_t size, > - void *vaddr, dma_addr_t dma_handle, > - unsigned long attrs) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > -} > - > -static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page, > - unsigned long offset, size_t size, > - enum dma_data_direction direction, > - unsigned long attrs) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > - return 0; > -} > - > -static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist, > - int nelems, enum dma_data_direction direction, > - unsigned long attrs) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > - return 0; > -} > - > -static int dma_npu_dma_supported(struct device *dev, u64 mask) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > - return 0; > -} > - > -static u64 dma_npu_get_required_mask(struct device *dev) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > - return 0; > -} > - > -static const struct dma_map_ops dma_npu_ops = { > - .map_page = dma_npu_map_page, > - .map_sg = dma_npu_map_sg, > - .alloc = dma_npu_alloc, > - .free = dma_npu_free, > - .dma_supported = dma_npu_dma_supported, > - .get_required_mask = dma_npu_get_required_mask, > -}; > - > -/* > - * Returns the PE assoicated with the PCI device of the given > - * NPU. Returns the linked pci device if pci_dev != NULL. > - */ > -static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe, > - struct pci_dev **gpdev) > -{ > - struct pnv_phb *phb; > - struct pci_controller *hose; > - struct pci_dev *pdev; > - struct pnv_ioda_pe *pe; > - struct pci_dn *pdn; > - > - pdev = pnv_pci_get_gpu_dev(npe->pdev); > - if (!pdev) > - return NULL; > - > - pdn = pci_get_pdn(pdev); > - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) > - return NULL; > - > - hose = pci_bus_to_host(pdev->bus); > - phb = hose->private_data; > - pe = &phb->ioda.pe_array[pdn->pe_number]; > - > - if (gpdev) > - *gpdev = pdev; > - > - return pe; > -} > - > -long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, > - struct iommu_table *tbl) > -{ > - struct pnv_phb *phb = npe->phb; > - int64_t rc; > - const unsigned long size = tbl->it_indirect_levels ? > - tbl->it_level_size : tbl->it_size; > - const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; > - const __u64 win_size = tbl->it_size << tbl->it_page_shift; > - > - pe_info(npe, "Setting up window %llx..%llx pg=%lx\n", > - start_addr, start_addr + win_size - 1, > - IOMMU_PAGE_SIZE(tbl)); > - > - rc = opal_pci_map_pe_dma_window(phb->opal_id, > - npe->pe_number, > - npe->pe_number, > - tbl->it_indirect_levels + 1, > - __pa(tbl->it_base), > - size << 3, > - IOMMU_PAGE_SIZE(tbl)); > - if (rc) { > - pe_err(npe, "Failed to configure TCE table, err %lld\n", rc); > - return rc; > - } > - pnv_pci_ioda2_tce_invalidate_entire(phb, false); > - > - /* Add the table to the list so its TCE cache will get invalidated */ > - pnv_pci_link_table_and_group(phb->hose->node, num, > - tbl, &npe->table_group); > - > - return 0; > -} > - > -long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num) > -{ > - struct pnv_phb *phb = npe->phb; > - int64_t rc; > - > - pe_info(npe, "Removing DMA window\n"); > - > - rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number, > - npe->pe_number, > - 0/* levels */, 0/* table address */, > - 0/* table size */, 0/* page size */); > - if (rc) { > - pe_err(npe, "Unmapping failed, ret = %lld\n", rc); > - return rc; > - } > - pnv_pci_ioda2_tce_invalidate_entire(phb, false); > - > - pnv_pci_unlink_table_and_group(npe->table_group.tables[num], > - &npe->table_group); > - > - return 0; > -} > - > -/* > - * Enables 32 bit DMA on NPU. > - */ > -static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe) > -{ > - struct pci_dev *gpdev; > - struct pnv_ioda_pe *gpe; > - int64_t rc; > - > - /* > - * Find the assoicated PCI devices and get the dma window > - * information from there. > - */ > - if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV)) > - return; > - > - gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); > - if (!gpe) > - return; > - > - rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]); > - > - /* > - * We don't initialise npu_pe->tce32_table as we always use > - * dma_npu_ops which are nops. > - */ > - set_dma_ops(&npe->pdev->dev, &dma_npu_ops); > -} > - > -/* > - * Enables bypass mode on the NPU. The NPU only supports one > - * window per link, so bypass needs to be explicitly enabled or > - * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be > - * active at the same time. > - */ > -static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) > -{ > - struct pnv_phb *phb = npe->phb; > - int64_t rc = 0; > - phys_addr_t top = memblock_end_of_DRAM(); > - > - if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev) > - return -EINVAL; > - > - rc = pnv_npu_unset_window(npe, 0); > - if (rc != OPAL_SUCCESS) > - return rc; > - > - /* Enable the bypass window */ > - > - top = roundup_pow_of_two(top); > - dev_info(&npe->pdev->dev, "Enabling bypass for PE %x\n", > - npe->pe_number); > - rc = opal_pci_map_pe_dma_window_real(phb->opal_id, > - npe->pe_number, npe->pe_number, > - 0 /* bypass base */, top); > - > - if (rc == OPAL_SUCCESS) > - pnv_pci_ioda2_tce_invalidate_entire(phb, false); > - > - return rc; > -} > - > -void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass) > -{ > - int i; > - struct pnv_phb *phb; > - struct pci_dn *pdn; > - struct pnv_ioda_pe *npe; > - struct pci_dev *npdev; > - > - for (i = 0; ; ++i) { > - npdev = pnv_pci_get_npu_dev(gpdev, i); > - > - if (!npdev) > - break; > - > - pdn = pci_get_pdn(npdev); > - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) > - return; > - > - phb = pci_bus_to_host(npdev->bus)->private_data; > - > - /* We only do bypass if it's enabled on the linked device */ > - npe = &phb->ioda.pe_array[pdn->pe_number]; > - > - if (bypass) { > - dev_info(&npdev->dev, > - "Using 64-bit DMA iommu bypass\n"); > - pnv_npu_dma_set_bypass(npe); > - } else { > - dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n"); > - pnv_npu_dma_set_32(npe); > - } > - } > -} > - > -/* Switch ownership from platform code to external user (e.g. VFIO) */ > -void pnv_npu_take_ownership(struct pnv_ioda_pe *npe) > -{ > - struct pnv_phb *phb = npe->phb; > - int64_t rc; > - > - /* > - * Note: NPU has just a single TVE in the hardware which means that > - * while used by the kernel, it can have either 32bit window or > - * DMA bypass but never both. So we deconfigure 32bit window only > - * if it was enabled at the moment of ownership change. > - */ > - if (npe->table_group.tables[0]) { > - pnv_npu_unset_window(npe, 0); > - return; > - } > - > - /* Disable bypass */ > - rc = opal_pci_map_pe_dma_window_real(phb->opal_id, > - npe->pe_number, npe->pe_number, > - 0 /* bypass base */, 0); > - if (rc) { > - pe_err(npe, "Failed to disable bypass, err %lld\n", rc); > - return; > - } > - pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false); > -} > - > -struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe) > -{ > - struct pnv_phb *phb = npe->phb; > - struct pci_bus *pbus = phb->hose->bus; > - struct pci_dev *npdev, *gpdev = NULL, *gptmp; > - struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); > - > - if (!gpe || !gpdev) > - return NULL; > - > - list_for_each_entry(npdev, &pbus->devices, bus_list) { > - gptmp = pnv_pci_get_gpu_dev(npdev); > - > - if (gptmp != gpdev) > - continue; > - > - pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev)); > - iommu_group_add_device(gpe->table_group.group, &npdev->dev); > - } > - > - return gpe; > -} > - > -/* Maximum number of nvlinks per npu */ > -#define NV_MAX_LINKS 6 > - > -/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */ > -static int max_npu2_index; > - > -struct npu_context { > - struct mm_struct *mm; > - struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS]; > - struct mmu_notifier mn; > - struct kref kref; > - bool nmmu_flush; > - > - /* Callback to stop translation requests on a given GPU */ > - void (*release_cb)(struct npu_context *context, void *priv); > - > - /* > - * Private pointer passed to the above callback for usage by > - * device drivers. > - */ > - void *priv; > -}; > - > -struct mmio_atsd_reg { > - struct npu *npu; > - int reg; > -}; > - > -/* > - * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC > - * if none are available. > - */ > -static int get_mmio_atsd_reg(struct npu *npu) > -{ > - int i; > - > - for (i = 0; i < npu->mmio_atsd_count; i++) { > - if (!test_bit(i, &npu->mmio_atsd_usage)) > - if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) > - return i; > - } > - > - return -ENOSPC; > -} > - > -static void put_mmio_atsd_reg(struct npu *npu, int reg) > -{ > - clear_bit_unlock(reg, &npu->mmio_atsd_usage); > -} > - > -/* MMIO ATSD register offsets */ > -#define XTS_ATSD_AVA 1 > -#define XTS_ATSD_STAT 2 > - > -static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg, > - unsigned long launch, unsigned long va) > -{ > - struct npu *npu = mmio_atsd_reg->npu; > - int reg = mmio_atsd_reg->reg; > - > - __raw_writeq_be(va, npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA); > - eieio(); > - __raw_writeq_be(launch, npu->mmio_atsd_regs[reg]); > -} > - > -static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], > - unsigned long pid, bool flush) > -{ > - int i; > - unsigned long launch; > - > - for (i = 0; i <= max_npu2_index; i++) { > - if (mmio_atsd_reg[i].reg < 0) > - continue; > - > - /* IS set to invalidate matching PID */ > - launch = PPC_BIT(12); > - > - /* PRS set to process-scoped */ > - launch |= PPC_BIT(13); > - > - /* AP */ > - launch |= (u64) > - mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); > - > - /* PID */ > - launch |= pid << PPC_BITLSHIFT(38); > - > - /* No flush */ > - launch |= !flush << PPC_BITLSHIFT(39); > - > - /* Invalidating the entire process doesn't use a va */ > - mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0); > - } > -} > - > -static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], > - unsigned long va, unsigned long pid, bool flush) > -{ > - int i; > - unsigned long launch; > - > - for (i = 0; i <= max_npu2_index; i++) { > - if (mmio_atsd_reg[i].reg < 0) > - continue; > - > - /* IS set to invalidate target VA */ > - launch = 0; > - > - /* PRS set to process scoped */ > - launch |= PPC_BIT(13); > - > - /* AP */ > - launch |= (u64) > - mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); > - > - /* PID */ > - launch |= pid << PPC_BITLSHIFT(38); > - > - /* No flush */ > - launch |= !flush << PPC_BITLSHIFT(39); > - > - mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va); > - } > -} > - > -#define mn_to_npu_context(x) container_of(x, struct npu_context, mn) > - > -static void mmio_invalidate_wait( > - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) > -{ > - struct npu *npu; > - int i, reg; > - > - /* Wait for all invalidations to complete */ > - for (i = 0; i <= max_npu2_index; i++) { > - if (mmio_atsd_reg[i].reg < 0) > - continue; > - > - /* Wait for completion */ > - npu = mmio_atsd_reg[i].npu; > - reg = mmio_atsd_reg[i].reg; > - while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) > - cpu_relax(); > - } > -} > - > -/* > - * Acquires all the address translation shootdown (ATSD) registers required to > - * launch an ATSD on all links this npu_context is active on. > - */ > -static void acquire_atsd_reg(struct npu_context *npu_context, > - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) > -{ > - int i, j; > - struct npu *npu; > - struct pci_dev *npdev; > - struct pnv_phb *nphb; > - > - for (i = 0; i <= max_npu2_index; i++) { > - mmio_atsd_reg[i].reg = -1; > - for (j = 0; j < NV_MAX_LINKS; j++) { > - /* > - * There are no ordering requirements with respect to > - * the setup of struct npu_context, but to ensure > - * consistent behaviour we need to ensure npdev[][] is > - * only read once. > - */ > - npdev = READ_ONCE(npu_context->npdev[i][j]); > - if (!npdev) > - continue; > - > - nphb = pci_bus_to_host(npdev->bus)->private_data; > - npu = &nphb->npu; > - mmio_atsd_reg[i].npu = npu; > - mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); > - while (mmio_atsd_reg[i].reg < 0) { > - mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); > - cpu_relax(); > - } > - break; > - } > - } > -} > - > -/* > - * Release previously acquired ATSD registers. To avoid deadlocks the registers > - * must be released in the same order they were acquired above in > - * acquire_atsd_reg. > - */ > -static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) > -{ > - int i; > - > - for (i = 0; i <= max_npu2_index; i++) { > - /* > - * We can't rely on npu_context->npdev[][] being the same here > - * as when acquire_atsd_reg() was called, hence we use the > - * values stored in mmio_atsd_reg during the acquire phase > - * rather than re-reading npdev[][]. > - */ > - if (mmio_atsd_reg[i].reg < 0) > - continue; > - > - put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg); > - } > -} > - > -/* > - * Invalidate either a single address or an entire PID depending on > - * the value of va. > - */ > -static void mmio_invalidate(struct npu_context *npu_context, int va, > - unsigned long address, bool flush) > -{ > - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; > - unsigned long pid = npu_context->mm->context.id; > - > - if (npu_context->nmmu_flush) > - /* > - * Unfortunately the nest mmu does not support flushing specific > - * addresses so we have to flush the whole mm once before > - * shooting down the GPU translation. > - */ > - flush_all_mm(npu_context->mm); > - > - /* > - * Loop over all the NPUs this process is active on and launch > - * an invalidate. > - */ > - acquire_atsd_reg(npu_context, mmio_atsd_reg); > - if (va) > - mmio_invalidate_va(mmio_atsd_reg, address, pid, flush); > - else > - mmio_invalidate_pid(mmio_atsd_reg, pid, flush); > - > - mmio_invalidate_wait(mmio_atsd_reg); > - if (flush) { > - /* > - * The GPU requires two flush ATSDs to ensure all entries have > - * been flushed. We use PID 0 as it will never be used for a > - * process on the GPU. > - */ > - mmio_invalidate_pid(mmio_atsd_reg, 0, true); > - mmio_invalidate_wait(mmio_atsd_reg); > - mmio_invalidate_pid(mmio_atsd_reg, 0, true); > - mmio_invalidate_wait(mmio_atsd_reg); > - } > - release_atsd_reg(mmio_atsd_reg); > -} > - > -static void pnv_npu2_mn_release(struct mmu_notifier *mn, > - struct mm_struct *mm) > -{ > - struct npu_context *npu_context = mn_to_npu_context(mn); > - > - /* Call into device driver to stop requests to the NMMU */ > - if (npu_context->release_cb) > - npu_context->release_cb(npu_context, npu_context->priv); > - > - /* > - * There should be no more translation requests for this PID, but we > - * need to ensure any entries for it are removed from the TLB. > - */ > - mmio_invalidate(npu_context, 0, 0, true); > -} > - > -static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, > - struct mm_struct *mm, > - unsigned long address, > - pte_t pte) > -{ > - struct npu_context *npu_context = mn_to_npu_context(mn); > - > - mmio_invalidate(npu_context, 1, address, true); > -} > - > -static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, > - struct mm_struct *mm, > - unsigned long start, unsigned long end) > -{ > - struct npu_context *npu_context = mn_to_npu_context(mn); > - unsigned long address; > - > - if (end - start > atsd_threshold) { > - /* > - * Just invalidate the entire PID if the address range is too > - * large. > - */ > - mmio_invalidate(npu_context, 0, 0, true); > - } else { > - for (address = start; address < end; address += PAGE_SIZE) > - mmio_invalidate(npu_context, 1, address, false); > - > - /* Do the flush only on the final addess == end */ > - mmio_invalidate(npu_context, 1, address, true); > - } > -} > - > -static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { > - .release = pnv_npu2_mn_release, > - .change_pte = pnv_npu2_mn_change_pte, > - .invalidate_range = pnv_npu2_mn_invalidate_range, > -}; > - > -/* > - * Call into OPAL to setup the nmmu context for the current task in > - * the NPU. This must be called to setup the context tables before the > - * GPU issues ATRs. pdev should be a pointed to PCIe GPU device. > - * > - * A release callback should be registered to allow a device driver to > - * be notified that it should not launch any new translation requests > - * as the final TLB invalidate is about to occur. > - * > - * Returns an error if there no contexts are currently available or a > - * npu_context which should be passed to pnv_npu2_handle_fault(). > - * > - * mmap_sem must be held in write mode and must not be called from interrupt > - * context. > - */ > -struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, > - unsigned long flags, > - void (*cb)(struct npu_context *, void *), > - void *priv) > -{ > - int rc; > - u32 nvlink_index; > - struct device_node *nvlink_dn; > - struct mm_struct *mm = current->mm; > - struct pnv_phb *nphb; > - struct npu *npu; > - struct npu_context *npu_context; > - > - /* > - * At present we don't support GPUs connected to multiple NPUs and I'm > - * not sure the hardware does either. > - */ > - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); > - > - if (!firmware_has_feature(FW_FEATURE_OPAL)) > - return ERR_PTR(-ENODEV); > - > - if (!npdev) > - /* No nvlink associated with this GPU device */ > - return ERR_PTR(-ENODEV); > - > - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); > - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", > - &nvlink_index))) > - return ERR_PTR(-ENODEV); > - > - if (!mm || mm->context.id == 0) { > - /* > - * Kernel thread contexts are not supported and context id 0 is > - * reserved on the GPU. > - */ > - return ERR_PTR(-EINVAL); > - } > - > - nphb = pci_bus_to_host(npdev->bus)->private_data; > - npu = &nphb->npu; > - > - /* > - * Setup the NPU context table for a particular GPU. These need to be > - * per-GPU as we need the tables to filter ATSDs when there are no > - * active contexts on a particular GPU. It is safe for these to be > - * called concurrently with destroy as the OPAL call takes appropriate > - * locks and refcounts on init/destroy. > - */ > - rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags, > - PCI_DEVID(gpdev->bus->number, gpdev->devfn)); > - if (rc < 0) > - return ERR_PTR(-ENOSPC); > - > - /* > - * We store the npu pci device so we can more easily get at the > - * associated npus. > - */ > - spin_lock(&npu_context_lock); > - npu_context = mm->context.npu_context; > - if (npu_context) { > - if (npu_context->release_cb != cb || > - npu_context->priv != priv) { > - spin_unlock(&npu_context_lock); > - opal_npu_destroy_context(nphb->opal_id, mm->context.id, > - PCI_DEVID(gpdev->bus->number, > - gpdev->devfn)); > - return ERR_PTR(-EINVAL); > - } > - > - WARN_ON(!kref_get_unless_zero(&npu_context->kref)); > - } > - spin_unlock(&npu_context_lock); > - > - if (!npu_context) { > - /* > - * We can set up these fields without holding the > - * npu_context_lock as the npu_context hasn't been returned to > - * the caller meaning it can't be destroyed. Parallel allocation > - * is protected against by mmap_sem. > - */ > - rc = -ENOMEM; > - npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL); > - if (npu_context) { > - kref_init(&npu_context->kref); > - npu_context->mm = mm; > - npu_context->mn.ops = &nv_nmmu_notifier_ops; > - rc = __mmu_notifier_register(&npu_context->mn, mm); > - } > - > - if (rc) { > - kfree(npu_context); > - opal_npu_destroy_context(nphb->opal_id, mm->context.id, > - PCI_DEVID(gpdev->bus->number, > - gpdev->devfn)); > - return ERR_PTR(rc); > - } > - > - mm->context.npu_context = npu_context; > - } > - > - npu_context->release_cb = cb; > - npu_context->priv = priv; > - > - /* > - * npdev is a pci_dev pointer setup by the PCI code. We assign it to > - * npdev[][] to indicate to the mmu notifiers that an invalidation > - * should also be sent over this nvlink. The notifiers don't use any > - * other fields in npu_context, so we just need to ensure that when they > - * deference npu_context->npdev[][] it is either a valid pointer or > - * NULL. > - */ > - WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev); > - > - if (!nphb->npu.nmmu_flush) { > - /* > - * If we're not explicitly flushing ourselves we need to mark > - * the thread for global flushes > - */ > - npu_context->nmmu_flush = false; > - mm_context_add_copro(mm); > - } else > - npu_context->nmmu_flush = true; > - > - return npu_context; > -} > -EXPORT_SYMBOL(pnv_npu2_init_context); > - > -static void pnv_npu2_release_context(struct kref *kref) > -{ > - struct npu_context *npu_context = > - container_of(kref, struct npu_context, kref); > - > - if (!npu_context->nmmu_flush) > - mm_context_remove_copro(npu_context->mm); > - > - npu_context->mm->context.npu_context = NULL; > -} > - > -/* > - * Destroy a context on the given GPU. May free the npu_context if it is no > - * longer active on any GPUs. Must not be called from interrupt context. > - */ > -void pnv_npu2_destroy_context(struct npu_context *npu_context, > - struct pci_dev *gpdev) > -{ > - int removed; > - struct pnv_phb *nphb; > - struct npu *npu; > - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); > - struct device_node *nvlink_dn; > - u32 nvlink_index; > - > - if (WARN_ON(!npdev)) > - return; > - > - if (!firmware_has_feature(FW_FEATURE_OPAL)) > - return; > - > - nphb = pci_bus_to_host(npdev->bus)->private_data; > - npu = &nphb->npu; > - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); > - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", > - &nvlink_index))) > - return; > - WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); > - opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, > - PCI_DEVID(gpdev->bus->number, gpdev->devfn)); > - spin_lock(&npu_context_lock); > - removed = kref_put(&npu_context->kref, pnv_npu2_release_context); > - spin_unlock(&npu_context_lock); > - > - /* > - * We need to do this outside of pnv_npu2_release_context so that it is > - * outside the spinlock as mmu_notifier_destroy uses SRCU. > - */ > - if (removed) { > - mmu_notifier_unregister(&npu_context->mn, > - npu_context->mm); > - > - kfree(npu_context); > - } > - > -} > -EXPORT_SYMBOL(pnv_npu2_destroy_context); > - > -/* > - * Assumes mmap_sem is held for the contexts associated mm. > - */ > -int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea, > - unsigned long *flags, unsigned long *status, int count) > -{ > - u64 rc = 0, result = 0; > - int i, is_write; > - struct page *page[1]; > - > - /* mmap_sem should be held so the struct_mm must be present */ > - struct mm_struct *mm = context->mm; > - > - if (!firmware_has_feature(FW_FEATURE_OPAL)) > - return -ENODEV; > - > - WARN_ON(!rwsem_is_locked(&mm->mmap_sem)); > - > - for (i = 0; i < count; i++) { > - is_write = flags[i] & NPU2_WRITE; > - rc = get_user_pages_remote(NULL, mm, ea[i], 1, > - is_write ? FOLL_WRITE : 0, > - page, NULL, NULL); > - > - /* > - * To support virtualised environments we will have to do an > - * access to the page to ensure it gets faulted into the > - * hypervisor. For the moment virtualisation is not supported in > - * other areas so leave the access out. > - */ > - if (rc != 1) { > - status[i] = rc; > - result = -EFAULT; > - continue; > - } > - > - status[i] = 0; > - put_page(page[0]); > - } > - > - return result; > -} > -EXPORT_SYMBOL(pnv_npu2_handle_fault); > - > -int pnv_npu2_init(struct pnv_phb *phb) > -{ > - unsigned int i; > - u64 mmio_atsd; > - struct device_node *dn; > - struct pci_dev *gpdev; > - static int npu_index; > - uint64_t rc = 0; > - > - if (!atsd_threshold_dentry) { > - atsd_threshold_dentry = debugfs_create_x64("atsd_threshold", > - 0600, powerpc_debugfs_root, &atsd_threshold); > - } > - > - phb->npu.nmmu_flush = > - of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush"); > - for_each_child_of_node(phb->hose->dn, dn) { > - gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn)); > - if (gpdev) { > - rc = opal_npu_map_lpar(phb->opal_id, > - PCI_DEVID(gpdev->bus->number, gpdev->devfn), > - 0, 0); > - if (rc) > - dev_err(&gpdev->dev, > - "Error %lld mapping device to LPAR\n", > - rc); > - } > - } > - > - for (i = 0; !of_property_read_u64_index(phb->hose->dn, "ibm,mmio-atsd", > - i, &mmio_atsd); i++) > - phb->npu.mmio_atsd_regs[i] = ioremap(mmio_atsd, 32); > - > - pr_info("NPU%lld: Found %d MMIO ATSD registers", phb->opal_id, i); > - phb->npu.mmio_atsd_count = i; > - phb->npu.mmio_atsd_usage = 0; > - npu_index++; > - if (WARN_ON(npu_index >= NV_MAX_NPUS)) > - return -ENOSPC; > - max_npu2_index = npu_index; > - phb->npu.index = npu_index; > - > - return 0; > -} > diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c > index 913175ba1c10..b6db65917bb4 100644 > --- a/arch/powerpc/platforms/powernv/pci-ioda.c > +++ b/arch/powerpc/platforms/powernv/pci-ioda.c > @@ -1203,75 +1203,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) > return pe; > } > > -static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) > -{ > - int pe_num, found_pe = false, rc; > - long rid; > - struct pnv_ioda_pe *pe; > - struct pci_dev *gpu_pdev; > - struct pci_dn *npu_pdn; > - struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus); > - struct pnv_phb *phb = hose->private_data; > - > - /* > - * Due to a hardware errata PE#0 on the NPU is reserved for > - * error handling. This means we only have three PEs remaining > - * which need to be assigned to four links, implying some > - * links must share PEs. > - * > - * To achieve this we assign PEs such that NPUs linking the > - * same GPU get assigned the same PE. > - */ > - gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev); > - for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) { > - pe = &phb->ioda.pe_array[pe_num]; > - if (!pe->pdev) > - continue; > - > - if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) { > - /* > - * This device has the same peer GPU so should > - * be assigned the same PE as the existing > - * peer NPU. > - */ > - dev_info(&npu_pdev->dev, > - "Associating to existing PE %x\n", pe_num); > - pci_dev_get(npu_pdev); > - npu_pdn = pci_get_pdn(npu_pdev); > - rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; > - npu_pdn->pe_number = pe_num; > - phb->ioda.pe_rmap[rid] = pe->pe_number; > - > - /* Map the PE to this link */ > - rc = opal_pci_set_pe(phb->opal_id, pe_num, rid, > - OpalPciBusAll, > - OPAL_COMPARE_RID_DEVICE_NUMBER, > - OPAL_COMPARE_RID_FUNCTION_NUMBER, > - OPAL_MAP_PE); > - WARN_ON(rc != OPAL_SUCCESS); > - found_pe = true; > - break; > - } > - } > - > - if (!found_pe) > - /* > - * Could not find an existing PE so allocate a new > - * one. > - */ > - return pnv_ioda_setup_dev_PE(npu_pdev); > - else > - return pe; > -} > - > -static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus) > -{ > - struct pci_dev *pdev; > - > - list_for_each_entry(pdev, &bus->devices, bus_list) > - pnv_ioda_setup_npu_PE(pdev); > -} > - > static void pnv_pci_ioda_setup_PEs(void) > { > struct pci_controller *hose, *tmp; > @@ -1281,13 +1212,6 @@ static void pnv_pci_ioda_setup_PEs(void) > > list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { > phb = hose->private_data; > - if (phb->type == PNV_PHB_NPU_NVLINK) { > - /* PE#0 is needed for error reporting */ > - pnv_ioda_reserve_pe(phb, 0); > - pnv_ioda_setup_npu_PEs(hose->bus); > - if (phb->model == PNV_PHB_MODEL_NPU2) > - pnv_npu2_init(phb); > - } > if (phb->type == PNV_PHB_NPU_OCAPI) { > bus = hose->bus; > list_for_each_entry(pdev, &bus->devices, bus_list) > @@ -1871,9 +1795,6 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) > } > *pdev->dev.dma_mask = dma_mask; > > - /* Update peer npu devices */ > - pnv_npu_try_dma_set_bypass(pdev, bypass); > - > return 0; > } > > @@ -2119,14 +2040,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, > } > } > > -void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm) > -{ > - if (phb->model == PNV_PHB_MODEL_NPU || phb->model == PNV_PHB_MODEL_PHB3) > - pnv_pci_phb3_tce_invalidate_entire(phb, rm); > - else > - opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL, 0, 0, 0, 0); > -} > - > static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, > long npages, unsigned long uaddr, > enum dma_data_direction direction, > @@ -2615,137 +2528,6 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = { > .take_ownership = pnv_ioda2_take_ownership, > .release_ownership = pnv_ioda2_release_ownership, > }; > - > -static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque) > -{ > - struct pci_controller *hose; > - struct pnv_phb *phb; > - struct pnv_ioda_pe **ptmppe = opaque; > - struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); > - struct pci_dn *pdn = pci_get_pdn(pdev); > - > - if (!pdn || pdn->pe_number == IODA_INVALID_PE) > - return 0; > - > - hose = pci_bus_to_host(pdev->bus); > - phb = hose->private_data; > - if (phb->type != PNV_PHB_NPU_NVLINK) > - return 0; > - > - *ptmppe = &phb->ioda.pe_array[pdn->pe_number]; > - > - return 1; > -} > - > -/* > - * This returns PE of associated NPU. > - * This assumes that NPU is in the same IOMMU group with GPU and there is > - * no other PEs. > - */ > -static struct pnv_ioda_pe *gpe_table_group_to_npe( > - struct iommu_table_group *table_group) > -{ > - struct pnv_ioda_pe *npe = NULL; > - int ret = iommu_group_for_each_dev(table_group->group, &npe, > - gpe_table_group_to_npe_cb); > - > - BUG_ON(!ret || !npe); > - > - return npe; > -} > - > -static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group, > - int num, struct iommu_table *tbl) > -{ > - struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); > - int num2 = (num == 0) ? 1 : 0; > - long ret = pnv_pci_ioda2_set_window(table_group, num, tbl); > - > - if (ret) > - return ret; > - > - if (table_group->tables[num2]) > - pnv_npu_unset_window(npe, num2); > - > - ret = pnv_npu_set_window(npe, num, tbl); > - if (ret) { > - pnv_pci_ioda2_unset_window(table_group, num); > - if (table_group->tables[num2]) > - pnv_npu_set_window(npe, num2, > - table_group->tables[num2]); > - } > - > - return ret; > -} > - > -static long pnv_pci_ioda2_npu_unset_window( > - struct iommu_table_group *table_group, > - int num) > -{ > - struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); > - int num2 = (num == 0) ? 1 : 0; > - long ret = pnv_pci_ioda2_unset_window(table_group, num); > - > - if (ret) > - return ret; > - > - if (!npe->table_group.tables[num]) > - return 0; > - > - ret = pnv_npu_unset_window(npe, num); > - if (ret) > - return ret; > - > - if (table_group->tables[num2]) > - ret = pnv_npu_set_window(npe, num2, table_group->tables[num2]); > - > - return ret; > -} > - > -static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group) > -{ > - /* > - * Detach NPU first as pnv_ioda2_take_ownership() will destroy > - * the iommu_table if 32bit DMA is enabled. > - */ > - pnv_npu_take_ownership(gpe_table_group_to_npe(table_group)); > - pnv_ioda2_take_ownership(table_group); > -} > - > -static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = { > - .get_table_size = pnv_pci_ioda2_get_table_size, > - .create_table = pnv_pci_ioda2_create_table_userspace, > - .set_window = pnv_pci_ioda2_npu_set_window, > - .unset_window = pnv_pci_ioda2_npu_unset_window, > - .take_ownership = pnv_ioda2_npu_take_ownership, > - .release_ownership = pnv_ioda2_release_ownership, > -}; > - > -static void pnv_pci_ioda_setup_iommu_api(void) > -{ > - struct pci_controller *hose, *tmp; > - struct pnv_phb *phb; > - struct pnv_ioda_pe *pe, *gpe; > - > - /* > - * Now we have all PHBs discovered, time to add NPU devices to > - * the corresponding IOMMU groups. > - */ > - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { > - phb = hose->private_data; > - > - if (phb->type != PNV_PHB_NPU_NVLINK) > - continue; > - > - list_for_each_entry(pe, &phb->ioda.pe_list, list) { > - gpe = pnv_pci_npu_setup_iommu(pe); > - if (gpe) > - gpe->table_group.ops = &pnv_pci_ioda2_npu_ops; > - } > - } > -} > -#else /* !CONFIG_IOMMU_API */ > -static void pnv_pci_ioda_setup_iommu_api(void) { }; > #endif > > static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) > @@ -3242,7 +3024,6 @@ static void pnv_pci_enable_bridges(void) > static void pnv_pci_ioda_fixup(void) > { > pnv_pci_ioda_setup_PEs(); > - pnv_pci_ioda_setup_iommu_api(); > pnv_pci_ioda_create_dbgfs(); > > pnv_pci_enable_bridges(); > @@ -3689,27 +3470,6 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { > .shutdown = pnv_pci_ioda_shutdown, > }; > > -static int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask) > -{ > - dev_err_once(&npdev->dev, > - "%s operation unsupported for NVLink devices\n", > - __func__); > - return -EPERM; > -} > - > -static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { > - .dma_dev_setup = pnv_pci_dma_dev_setup, > -#ifdef CONFIG_PCI_MSI > - .setup_msi_irqs = pnv_setup_msi_irqs, > - .teardown_msi_irqs = pnv_teardown_msi_irqs, > -#endif > - .enable_device_hook = pnv_pci_enable_device_hook, > - .window_alignment = pnv_pci_window_alignment, > - .reset_secondary_bus = pnv_pci_reset_secondary_bus, > - .dma_set_mask = pnv_npu_dma_set_mask, > - .shutdown = pnv_pci_ioda_shutdown, > -}; > - > static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { > .enable_device_hook = pnv_pci_enable_device_hook, > .window_alignment = pnv_pci_window_alignment, > @@ -3931,9 +3691,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, > ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; > > switch (phb->type) { > - case PNV_PHB_NPU_NVLINK: > - hose->controller_ops = pnv_npu_ioda_controller_ops; > - break; > case PNV_PHB_NPU_OCAPI: > hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops; > break; > diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h > index 8b37b28e3831..54f2935b7ac5 100644 > --- a/arch/powerpc/platforms/powernv/pci.h > +++ b/arch/powerpc/platforms/powernv/pci.h > @@ -231,17 +231,6 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, > #define pe_info(pe, fmt, ...) \ > pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) > > -/* Nvlink functions */ > -extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass); > -extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); > -extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe); > -extern long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, > - struct iommu_table *tbl); > -extern long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num); > -extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe); > -extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe); > -extern int pnv_npu2_init(struct pnv_phb *phb); > - > /* pci-ioda-tce.c */ > #define POWERNV_IOMMU_DEFAULT_LEVELS 1 > #define POWERNV_IOMMU_MAX_LEVELS 5 >
On Mon, 2018-10-15 at 12:34 +1100, Alexey Kardashevskiy wrote: > On 10/10/2018 00:24, Christoph Hellwig wrote: > > This code has been unused since it was merged and is in the way of > > cleaning up the DMA code, thus remove it. > > > > This effectively reverts commit 5d2aa710 ("powerpc/powernv: Add support > > for Nvlink NPUs"). > > > This code is heavily used by the NVIDIA GPU driver. Some of it is, yes. And while I don't want to be involved in the discussion about that specific can of worms, there is code in this file related to the custom "always error" DMA ops that I suppose we could remove, which is what is getting in the way of Christoph cleanups. It's just meant as a debug stuff to catch incorrect attempts at doing the dma mappings on the wrong "side" of the GPU. Cheers, Ben.
On Mon, Oct 15, 2018 at 12:34:02PM +1100, Alexey Kardashevskiy wrote: > > On 10/10/2018 00:24, Christoph Hellwig wrote: > > This code has been unused since it was merged and is in the way of > > cleaning up the DMA code, thus remove it. > > > > This effectively reverts commit 5d2aa710 ("powerpc/powernv: Add support > > for Nvlink NPUs"). > > > This code is heavily used by the NVIDIA GPU driver. Not by the that actually exists in the kernel tree, so it simply doesn't matter.
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 2af9ded80540..a01d2e3d6ff9 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -127,7 +127,4 @@ extern void pcibios_scan_phb(struct pci_controller *hose); #endif /* __KERNEL__ */ -extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev); -extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index); - #endif /* __ASM_POWERPC_PCI_H */ diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h index 2f3ff7a27881..4848a6b3c6b2 100644 --- a/arch/powerpc/include/asm/powernv.h +++ b/arch/powerpc/include/asm/powernv.h @@ -11,33 +11,10 @@ #define _ASM_POWERNV_H #ifdef CONFIG_PPC_POWERNV -#define NPU2_WRITE 1 extern void powernv_set_nmmu_ptcr(unsigned long ptcr); -extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, - unsigned long flags, - void (*cb)(struct npu_context *, void *), - void *priv); -extern void pnv_npu2_destroy_context(struct npu_context *context, - struct pci_dev *gpdev); -extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea, - unsigned long *flags, unsigned long *status, - int count); - void pnv_tm_init(void); #else static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { } -static inline struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, - unsigned long flags, - struct npu_context *(*cb)(struct npu_context *, void *), - void *priv) { return ERR_PTR(-ENODEV); } -static inline void pnv_npu2_destroy_context(struct npu_context *context, - struct pci_dev *gpdev) { } - -static inline int pnv_npu2_handle_fault(struct npu_context *context, - uintptr_t *ea, unsigned long *flags, - unsigned long *status, int count) { - return -ENODEV; -} static inline void pnv_tm_init(void) { } static inline void pnv_power9_force_smt4(void) { } diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index b540ce8eec55..2b13e9dd137c 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -6,7 +6,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o -obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o +obj-$(CONFIG_PCI) += pci.o pci-ioda.o pci-ioda-tce.o obj-$(CONFIG_CXL_BASE) += pci-cxl.o obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c deleted file mode 100644 index 8006c54a91e3..000000000000 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ /dev/null @@ -1,999 +0,0 @@ -/* - * This file implements the DMA operations for NVLink devices. The NPU - * devices all point to the same iommu table as the parent PCI device. - * - * Copyright Alistair Popple, IBM Corporation 2015. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - */ - -#include <linux/slab.h> -#include <linux/mmu_notifier.h> -#include <linux/mmu_context.h> -#include <linux/of.h> -#include <linux/export.h> -#include <linux/pci.h> -#include <linux/memblock.h> -#include <linux/iommu.h> -#include <linux/debugfs.h> - -#include <asm/debugfs.h> -#include <asm/tlb.h> -#include <asm/powernv.h> -#include <asm/reg.h> -#include <asm/opal.h> -#include <asm/io.h> -#include <asm/iommu.h> -#include <asm/pnv-pci.h> -#include <asm/msi_bitmap.h> -#include <asm/opal.h> - -#include "powernv.h" -#include "pci.h" - -#define npu_to_phb(x) container_of(x, struct pnv_phb, npu) - -/* - * spinlock to protect initialisation of an npu_context for a particular - * mm_struct. - */ -static DEFINE_SPINLOCK(npu_context_lock); - -/* - * When an address shootdown range exceeds this threshold we invalidate the - * entire TLB on the GPU for the given PID rather than each specific address in - * the range. - */ -static uint64_t atsd_threshold = 2 * 1024 * 1024; -static struct dentry *atsd_threshold_dentry; - -/* - * Other types of TCE cache invalidation are not functional in the - * hardware. - */ -static struct pci_dev *get_pci_dev(struct device_node *dn) -{ - struct pci_dn *pdn = PCI_DN(dn); - - return pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus), - pdn->busno, pdn->devfn); -} - -/* Given a NPU device get the associated PCI device. */ -struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev) -{ - struct device_node *dn; - struct pci_dev *gpdev; - - if (WARN_ON(!npdev)) - return NULL; - - if (WARN_ON(!npdev->dev.of_node)) - return NULL; - - /* Get assoicated PCI device */ - dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0); - if (!dn) - return NULL; - - gpdev = get_pci_dev(dn); - of_node_put(dn); - - return gpdev; -} -EXPORT_SYMBOL(pnv_pci_get_gpu_dev); - -/* Given the real PCI device get a linked NPU device. */ -struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index) -{ - struct device_node *dn; - struct pci_dev *npdev; - - if (WARN_ON(!gpdev)) - return NULL; - - /* Not all PCI devices have device-tree nodes */ - if (!gpdev->dev.of_node) - return NULL; - - /* Get assoicated PCI device */ - dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index); - if (!dn) - return NULL; - - npdev = get_pci_dev(dn); - of_node_put(dn); - - return npdev; -} -EXPORT_SYMBOL(pnv_pci_get_npu_dev); - -#define NPU_DMA_OP_UNSUPPORTED() \ - dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \ - __func__) - -static void *dma_npu_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); - return NULL; -} - -static void dma_npu_free(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); -} - -static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static int dma_npu_dma_supported(struct device *dev, u64 mask) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static u64 dma_npu_get_required_mask(struct device *dev) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static const struct dma_map_ops dma_npu_ops = { - .map_page = dma_npu_map_page, - .map_sg = dma_npu_map_sg, - .alloc = dma_npu_alloc, - .free = dma_npu_free, - .dma_supported = dma_npu_dma_supported, - .get_required_mask = dma_npu_get_required_mask, -}; - -/* - * Returns the PE assoicated with the PCI device of the given - * NPU. Returns the linked pci device if pci_dev != NULL. - */ -static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe, - struct pci_dev **gpdev) -{ - struct pnv_phb *phb; - struct pci_controller *hose; - struct pci_dev *pdev; - struct pnv_ioda_pe *pe; - struct pci_dn *pdn; - - pdev = pnv_pci_get_gpu_dev(npe->pdev); - if (!pdev) - return NULL; - - pdn = pci_get_pdn(pdev); - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) - return NULL; - - hose = pci_bus_to_host(pdev->bus); - phb = hose->private_data; - pe = &phb->ioda.pe_array[pdn->pe_number]; - - if (gpdev) - *gpdev = pdev; - - return pe; -} - -long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, - struct iommu_table *tbl) -{ - struct pnv_phb *phb = npe->phb; - int64_t rc; - const unsigned long size = tbl->it_indirect_levels ? - tbl->it_level_size : tbl->it_size; - const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; - const __u64 win_size = tbl->it_size << tbl->it_page_shift; - - pe_info(npe, "Setting up window %llx..%llx pg=%lx\n", - start_addr, start_addr + win_size - 1, - IOMMU_PAGE_SIZE(tbl)); - - rc = opal_pci_map_pe_dma_window(phb->opal_id, - npe->pe_number, - npe->pe_number, - tbl->it_indirect_levels + 1, - __pa(tbl->it_base), - size << 3, - IOMMU_PAGE_SIZE(tbl)); - if (rc) { - pe_err(npe, "Failed to configure TCE table, err %lld\n", rc); - return rc; - } - pnv_pci_ioda2_tce_invalidate_entire(phb, false); - - /* Add the table to the list so its TCE cache will get invalidated */ - pnv_pci_link_table_and_group(phb->hose->node, num, - tbl, &npe->table_group); - - return 0; -} - -long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num) -{ - struct pnv_phb *phb = npe->phb; - int64_t rc; - - pe_info(npe, "Removing DMA window\n"); - - rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number, - npe->pe_number, - 0/* levels */, 0/* table address */, - 0/* table size */, 0/* page size */); - if (rc) { - pe_err(npe, "Unmapping failed, ret = %lld\n", rc); - return rc; - } - pnv_pci_ioda2_tce_invalidate_entire(phb, false); - - pnv_pci_unlink_table_and_group(npe->table_group.tables[num], - &npe->table_group); - - return 0; -} - -/* - * Enables 32 bit DMA on NPU. - */ -static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe) -{ - struct pci_dev *gpdev; - struct pnv_ioda_pe *gpe; - int64_t rc; - - /* - * Find the assoicated PCI devices and get the dma window - * information from there. - */ - if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV)) - return; - - gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); - if (!gpe) - return; - - rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]); - - /* - * We don't initialise npu_pe->tce32_table as we always use - * dma_npu_ops which are nops. - */ - set_dma_ops(&npe->pdev->dev, &dma_npu_ops); -} - -/* - * Enables bypass mode on the NPU. The NPU only supports one - * window per link, so bypass needs to be explicitly enabled or - * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be - * active at the same time. - */ -static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) -{ - struct pnv_phb *phb = npe->phb; - int64_t rc = 0; - phys_addr_t top = memblock_end_of_DRAM(); - - if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev) - return -EINVAL; - - rc = pnv_npu_unset_window(npe, 0); - if (rc != OPAL_SUCCESS) - return rc; - - /* Enable the bypass window */ - - top = roundup_pow_of_two(top); - dev_info(&npe->pdev->dev, "Enabling bypass for PE %x\n", - npe->pe_number); - rc = opal_pci_map_pe_dma_window_real(phb->opal_id, - npe->pe_number, npe->pe_number, - 0 /* bypass base */, top); - - if (rc == OPAL_SUCCESS) - pnv_pci_ioda2_tce_invalidate_entire(phb, false); - - return rc; -} - -void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass) -{ - int i; - struct pnv_phb *phb; - struct pci_dn *pdn; - struct pnv_ioda_pe *npe; - struct pci_dev *npdev; - - for (i = 0; ; ++i) { - npdev = pnv_pci_get_npu_dev(gpdev, i); - - if (!npdev) - break; - - pdn = pci_get_pdn(npdev); - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) - return; - - phb = pci_bus_to_host(npdev->bus)->private_data; - - /* We only do bypass if it's enabled on the linked device */ - npe = &phb->ioda.pe_array[pdn->pe_number]; - - if (bypass) { - dev_info(&npdev->dev, - "Using 64-bit DMA iommu bypass\n"); - pnv_npu_dma_set_bypass(npe); - } else { - dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n"); - pnv_npu_dma_set_32(npe); - } - } -} - -/* Switch ownership from platform code to external user (e.g. VFIO) */ -void pnv_npu_take_ownership(struct pnv_ioda_pe *npe) -{ - struct pnv_phb *phb = npe->phb; - int64_t rc; - - /* - * Note: NPU has just a single TVE in the hardware which means that - * while used by the kernel, it can have either 32bit window or - * DMA bypass but never both. So we deconfigure 32bit window only - * if it was enabled at the moment of ownership change. - */ - if (npe->table_group.tables[0]) { - pnv_npu_unset_window(npe, 0); - return; - } - - /* Disable bypass */ - rc = opal_pci_map_pe_dma_window_real(phb->opal_id, - npe->pe_number, npe->pe_number, - 0 /* bypass base */, 0); - if (rc) { - pe_err(npe, "Failed to disable bypass, err %lld\n", rc); - return; - } - pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false); -} - -struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe) -{ - struct pnv_phb *phb = npe->phb; - struct pci_bus *pbus = phb->hose->bus; - struct pci_dev *npdev, *gpdev = NULL, *gptmp; - struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); - - if (!gpe || !gpdev) - return NULL; - - list_for_each_entry(npdev, &pbus->devices, bus_list) { - gptmp = pnv_pci_get_gpu_dev(npdev); - - if (gptmp != gpdev) - continue; - - pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev)); - iommu_group_add_device(gpe->table_group.group, &npdev->dev); - } - - return gpe; -} - -/* Maximum number of nvlinks per npu */ -#define NV_MAX_LINKS 6 - -/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */ -static int max_npu2_index; - -struct npu_context { - struct mm_struct *mm; - struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS]; - struct mmu_notifier mn; - struct kref kref; - bool nmmu_flush; - - /* Callback to stop translation requests on a given GPU */ - void (*release_cb)(struct npu_context *context, void *priv); - - /* - * Private pointer passed to the above callback for usage by - * device drivers. - */ - void *priv; -}; - -struct mmio_atsd_reg { - struct npu *npu; - int reg; -}; - -/* - * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC - * if none are available. - */ -static int get_mmio_atsd_reg(struct npu *npu) -{ - int i; - - for (i = 0; i < npu->mmio_atsd_count; i++) { - if (!test_bit(i, &npu->mmio_atsd_usage)) - if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) - return i; - } - - return -ENOSPC; -} - -static void put_mmio_atsd_reg(struct npu *npu, int reg) -{ - clear_bit_unlock(reg, &npu->mmio_atsd_usage); -} - -/* MMIO ATSD register offsets */ -#define XTS_ATSD_AVA 1 -#define XTS_ATSD_STAT 2 - -static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg, - unsigned long launch, unsigned long va) -{ - struct npu *npu = mmio_atsd_reg->npu; - int reg = mmio_atsd_reg->reg; - - __raw_writeq_be(va, npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA); - eieio(); - __raw_writeq_be(launch, npu->mmio_atsd_regs[reg]); -} - -static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], - unsigned long pid, bool flush) -{ - int i; - unsigned long launch; - - for (i = 0; i <= max_npu2_index; i++) { - if (mmio_atsd_reg[i].reg < 0) - continue; - - /* IS set to invalidate matching PID */ - launch = PPC_BIT(12); - - /* PRS set to process-scoped */ - launch |= PPC_BIT(13); - - /* AP */ - launch |= (u64) - mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); - - /* PID */ - launch |= pid << PPC_BITLSHIFT(38); - - /* No flush */ - launch |= !flush << PPC_BITLSHIFT(39); - - /* Invalidating the entire process doesn't use a va */ - mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0); - } -} - -static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], - unsigned long va, unsigned long pid, bool flush) -{ - int i; - unsigned long launch; - - for (i = 0; i <= max_npu2_index; i++) { - if (mmio_atsd_reg[i].reg < 0) - continue; - - /* IS set to invalidate target VA */ - launch = 0; - - /* PRS set to process scoped */ - launch |= PPC_BIT(13); - - /* AP */ - launch |= (u64) - mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); - - /* PID */ - launch |= pid << PPC_BITLSHIFT(38); - - /* No flush */ - launch |= !flush << PPC_BITLSHIFT(39); - - mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va); - } -} - -#define mn_to_npu_context(x) container_of(x, struct npu_context, mn) - -static void mmio_invalidate_wait( - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) -{ - struct npu *npu; - int i, reg; - - /* Wait for all invalidations to complete */ - for (i = 0; i <= max_npu2_index; i++) { - if (mmio_atsd_reg[i].reg < 0) - continue; - - /* Wait for completion */ - npu = mmio_atsd_reg[i].npu; - reg = mmio_atsd_reg[i].reg; - while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) - cpu_relax(); - } -} - -/* - * Acquires all the address translation shootdown (ATSD) registers required to - * launch an ATSD on all links this npu_context is active on. - */ -static void acquire_atsd_reg(struct npu_context *npu_context, - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) -{ - int i, j; - struct npu *npu; - struct pci_dev *npdev; - struct pnv_phb *nphb; - - for (i = 0; i <= max_npu2_index; i++) { - mmio_atsd_reg[i].reg = -1; - for (j = 0; j < NV_MAX_LINKS; j++) { - /* - * There are no ordering requirements with respect to - * the setup of struct npu_context, but to ensure - * consistent behaviour we need to ensure npdev[][] is - * only read once. - */ - npdev = READ_ONCE(npu_context->npdev[i][j]); - if (!npdev) - continue; - - nphb = pci_bus_to_host(npdev->bus)->private_data; - npu = &nphb->npu; - mmio_atsd_reg[i].npu = npu; - mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); - while (mmio_atsd_reg[i].reg < 0) { - mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); - cpu_relax(); - } - break; - } - } -} - -/* - * Release previously acquired ATSD registers. To avoid deadlocks the registers - * must be released in the same order they were acquired above in - * acquire_atsd_reg. - */ -static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) -{ - int i; - - for (i = 0; i <= max_npu2_index; i++) { - /* - * We can't rely on npu_context->npdev[][] being the same here - * as when acquire_atsd_reg() was called, hence we use the - * values stored in mmio_atsd_reg during the acquire phase - * rather than re-reading npdev[][]. - */ - if (mmio_atsd_reg[i].reg < 0) - continue; - - put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg); - } -} - -/* - * Invalidate either a single address or an entire PID depending on - * the value of va. - */ -static void mmio_invalidate(struct npu_context *npu_context, int va, - unsigned long address, bool flush) -{ - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; - unsigned long pid = npu_context->mm->context.id; - - if (npu_context->nmmu_flush) - /* - * Unfortunately the nest mmu does not support flushing specific - * addresses so we have to flush the whole mm once before - * shooting down the GPU translation. - */ - flush_all_mm(npu_context->mm); - - /* - * Loop over all the NPUs this process is active on and launch - * an invalidate. - */ - acquire_atsd_reg(npu_context, mmio_atsd_reg); - if (va) - mmio_invalidate_va(mmio_atsd_reg, address, pid, flush); - else - mmio_invalidate_pid(mmio_atsd_reg, pid, flush); - - mmio_invalidate_wait(mmio_atsd_reg); - if (flush) { - /* - * The GPU requires two flush ATSDs to ensure all entries have - * been flushed. We use PID 0 as it will never be used for a - * process on the GPU. - */ - mmio_invalidate_pid(mmio_atsd_reg, 0, true); - mmio_invalidate_wait(mmio_atsd_reg); - mmio_invalidate_pid(mmio_atsd_reg, 0, true); - mmio_invalidate_wait(mmio_atsd_reg); - } - release_atsd_reg(mmio_atsd_reg); -} - -static void pnv_npu2_mn_release(struct mmu_notifier *mn, - struct mm_struct *mm) -{ - struct npu_context *npu_context = mn_to_npu_context(mn); - - /* Call into device driver to stop requests to the NMMU */ - if (npu_context->release_cb) - npu_context->release_cb(npu_context, npu_context->priv); - - /* - * There should be no more translation requests for this PID, but we - * need to ensure any entries for it are removed from the TLB. - */ - mmio_invalidate(npu_context, 0, 0, true); -} - -static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long address, - pte_t pte) -{ - struct npu_context *npu_context = mn_to_npu_context(mn); - - mmio_invalidate(npu_context, 1, address, true); -} - -static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - struct npu_context *npu_context = mn_to_npu_context(mn); - unsigned long address; - - if (end - start > atsd_threshold) { - /* - * Just invalidate the entire PID if the address range is too - * large. - */ - mmio_invalidate(npu_context, 0, 0, true); - } else { - for (address = start; address < end; address += PAGE_SIZE) - mmio_invalidate(npu_context, 1, address, false); - - /* Do the flush only on the final addess == end */ - mmio_invalidate(npu_context, 1, address, true); - } -} - -static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { - .release = pnv_npu2_mn_release, - .change_pte = pnv_npu2_mn_change_pte, - .invalidate_range = pnv_npu2_mn_invalidate_range, -}; - -/* - * Call into OPAL to setup the nmmu context for the current task in - * the NPU. This must be called to setup the context tables before the - * GPU issues ATRs. pdev should be a pointed to PCIe GPU device. - * - * A release callback should be registered to allow a device driver to - * be notified that it should not launch any new translation requests - * as the final TLB invalidate is about to occur. - * - * Returns an error if there no contexts are currently available or a - * npu_context which should be passed to pnv_npu2_handle_fault(). - * - * mmap_sem must be held in write mode and must not be called from interrupt - * context. - */ -struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, - unsigned long flags, - void (*cb)(struct npu_context *, void *), - void *priv) -{ - int rc; - u32 nvlink_index; - struct device_node *nvlink_dn; - struct mm_struct *mm = current->mm; - struct pnv_phb *nphb; - struct npu *npu; - struct npu_context *npu_context; - - /* - * At present we don't support GPUs connected to multiple NPUs and I'm - * not sure the hardware does either. - */ - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); - - if (!firmware_has_feature(FW_FEATURE_OPAL)) - return ERR_PTR(-ENODEV); - - if (!npdev) - /* No nvlink associated with this GPU device */ - return ERR_PTR(-ENODEV); - - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", - &nvlink_index))) - return ERR_PTR(-ENODEV); - - if (!mm || mm->context.id == 0) { - /* - * Kernel thread contexts are not supported and context id 0 is - * reserved on the GPU. - */ - return ERR_PTR(-EINVAL); - } - - nphb = pci_bus_to_host(npdev->bus)->private_data; - npu = &nphb->npu; - - /* - * Setup the NPU context table for a particular GPU. These need to be - * per-GPU as we need the tables to filter ATSDs when there are no - * active contexts on a particular GPU. It is safe for these to be - * called concurrently with destroy as the OPAL call takes appropriate - * locks and refcounts on init/destroy. - */ - rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags, - PCI_DEVID(gpdev->bus->number, gpdev->devfn)); - if (rc < 0) - return ERR_PTR(-ENOSPC); - - /* - * We store the npu pci device so we can more easily get at the - * associated npus. - */ - spin_lock(&npu_context_lock); - npu_context = mm->context.npu_context; - if (npu_context) { - if (npu_context->release_cb != cb || - npu_context->priv != priv) { - spin_unlock(&npu_context_lock); - opal_npu_destroy_context(nphb->opal_id, mm->context.id, - PCI_DEVID(gpdev->bus->number, - gpdev->devfn)); - return ERR_PTR(-EINVAL); - } - - WARN_ON(!kref_get_unless_zero(&npu_context->kref)); - } - spin_unlock(&npu_context_lock); - - if (!npu_context) { - /* - * We can set up these fields without holding the - * npu_context_lock as the npu_context hasn't been returned to - * the caller meaning it can't be destroyed. Parallel allocation - * is protected against by mmap_sem. - */ - rc = -ENOMEM; - npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL); - if (npu_context) { - kref_init(&npu_context->kref); - npu_context->mm = mm; - npu_context->mn.ops = &nv_nmmu_notifier_ops; - rc = __mmu_notifier_register(&npu_context->mn, mm); - } - - if (rc) { - kfree(npu_context); - opal_npu_destroy_context(nphb->opal_id, mm->context.id, - PCI_DEVID(gpdev->bus->number, - gpdev->devfn)); - return ERR_PTR(rc); - } - - mm->context.npu_context = npu_context; - } - - npu_context->release_cb = cb; - npu_context->priv = priv; - - /* - * npdev is a pci_dev pointer setup by the PCI code. We assign it to - * npdev[][] to indicate to the mmu notifiers that an invalidation - * should also be sent over this nvlink. The notifiers don't use any - * other fields in npu_context, so we just need to ensure that when they - * deference npu_context->npdev[][] it is either a valid pointer or - * NULL. - */ - WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev); - - if (!nphb->npu.nmmu_flush) { - /* - * If we're not explicitly flushing ourselves we need to mark - * the thread for global flushes - */ - npu_context->nmmu_flush = false; - mm_context_add_copro(mm); - } else - npu_context->nmmu_flush = true; - - return npu_context; -} -EXPORT_SYMBOL(pnv_npu2_init_context); - -static void pnv_npu2_release_context(struct kref *kref) -{ - struct npu_context *npu_context = - container_of(kref, struct npu_context, kref); - - if (!npu_context->nmmu_flush) - mm_context_remove_copro(npu_context->mm); - - npu_context->mm->context.npu_context = NULL; -} - -/* - * Destroy a context on the given GPU. May free the npu_context if it is no - * longer active on any GPUs. Must not be called from interrupt context. - */ -void pnv_npu2_destroy_context(struct npu_context *npu_context, - struct pci_dev *gpdev) -{ - int removed; - struct pnv_phb *nphb; - struct npu *npu; - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); - struct device_node *nvlink_dn; - u32 nvlink_index; - - if (WARN_ON(!npdev)) - return; - - if (!firmware_has_feature(FW_FEATURE_OPAL)) - return; - - nphb = pci_bus_to_host(npdev->bus)->private_data; - npu = &nphb->npu; - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", - &nvlink_index))) - return; - WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); - opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, - PCI_DEVID(gpdev->bus->number, gpdev->devfn)); - spin_lock(&npu_context_lock); - removed = kref_put(&npu_context->kref, pnv_npu2_release_context); - spin_unlock(&npu_context_lock); - - /* - * We need to do this outside of pnv_npu2_release_context so that it is - * outside the spinlock as mmu_notifier_destroy uses SRCU. - */ - if (removed) { - mmu_notifier_unregister(&npu_context->mn, - npu_context->mm); - - kfree(npu_context); - } - -} -EXPORT_SYMBOL(pnv_npu2_destroy_context); - -/* - * Assumes mmap_sem is held for the contexts associated mm. - */ -int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea, - unsigned long *flags, unsigned long *status, int count) -{ - u64 rc = 0, result = 0; - int i, is_write; - struct page *page[1]; - - /* mmap_sem should be held so the struct_mm must be present */ - struct mm_struct *mm = context->mm; - - if (!firmware_has_feature(FW_FEATURE_OPAL)) - return -ENODEV; - - WARN_ON(!rwsem_is_locked(&mm->mmap_sem)); - - for (i = 0; i < count; i++) { - is_write = flags[i] & NPU2_WRITE; - rc = get_user_pages_remote(NULL, mm, ea[i], 1, - is_write ? FOLL_WRITE : 0, - page, NULL, NULL); - - /* - * To support virtualised environments we will have to do an - * access to the page to ensure it gets faulted into the - * hypervisor. For the moment virtualisation is not supported in - * other areas so leave the access out. - */ - if (rc != 1) { - status[i] = rc; - result = -EFAULT; - continue; - } - - status[i] = 0; - put_page(page[0]); - } - - return result; -} -EXPORT_SYMBOL(pnv_npu2_handle_fault); - -int pnv_npu2_init(struct pnv_phb *phb) -{ - unsigned int i; - u64 mmio_atsd; - struct device_node *dn; - struct pci_dev *gpdev; - static int npu_index; - uint64_t rc = 0; - - if (!atsd_threshold_dentry) { - atsd_threshold_dentry = debugfs_create_x64("atsd_threshold", - 0600, powerpc_debugfs_root, &atsd_threshold); - } - - phb->npu.nmmu_flush = - of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush"); - for_each_child_of_node(phb->hose->dn, dn) { - gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn)); - if (gpdev) { - rc = opal_npu_map_lpar(phb->opal_id, - PCI_DEVID(gpdev->bus->number, gpdev->devfn), - 0, 0); - if (rc) - dev_err(&gpdev->dev, - "Error %lld mapping device to LPAR\n", - rc); - } - } - - for (i = 0; !of_property_read_u64_index(phb->hose->dn, "ibm,mmio-atsd", - i, &mmio_atsd); i++) - phb->npu.mmio_atsd_regs[i] = ioremap(mmio_atsd, 32); - - pr_info("NPU%lld: Found %d MMIO ATSD registers", phb->opal_id, i); - phb->npu.mmio_atsd_count = i; - phb->npu.mmio_atsd_usage = 0; - npu_index++; - if (WARN_ON(npu_index >= NV_MAX_NPUS)) - return -ENOSPC; - max_npu2_index = npu_index; - phb->npu.index = npu_index; - - return 0; -} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 913175ba1c10..b6db65917bb4 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1203,75 +1203,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) return pe; } -static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) -{ - int pe_num, found_pe = false, rc; - long rid; - struct pnv_ioda_pe *pe; - struct pci_dev *gpu_pdev; - struct pci_dn *npu_pdn; - struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus); - struct pnv_phb *phb = hose->private_data; - - /* - * Due to a hardware errata PE#0 on the NPU is reserved for - * error handling. This means we only have three PEs remaining - * which need to be assigned to four links, implying some - * links must share PEs. - * - * To achieve this we assign PEs such that NPUs linking the - * same GPU get assigned the same PE. - */ - gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev); - for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) { - pe = &phb->ioda.pe_array[pe_num]; - if (!pe->pdev) - continue; - - if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) { - /* - * This device has the same peer GPU so should - * be assigned the same PE as the existing - * peer NPU. - */ - dev_info(&npu_pdev->dev, - "Associating to existing PE %x\n", pe_num); - pci_dev_get(npu_pdev); - npu_pdn = pci_get_pdn(npu_pdev); - rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; - npu_pdn->pe_number = pe_num; - phb->ioda.pe_rmap[rid] = pe->pe_number; - - /* Map the PE to this link */ - rc = opal_pci_set_pe(phb->opal_id, pe_num, rid, - OpalPciBusAll, - OPAL_COMPARE_RID_DEVICE_NUMBER, - OPAL_COMPARE_RID_FUNCTION_NUMBER, - OPAL_MAP_PE); - WARN_ON(rc != OPAL_SUCCESS); - found_pe = true; - break; - } - } - - if (!found_pe) - /* - * Could not find an existing PE so allocate a new - * one. - */ - return pnv_ioda_setup_dev_PE(npu_pdev); - else - return pe; -} - -static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus) -{ - struct pci_dev *pdev; - - list_for_each_entry(pdev, &bus->devices, bus_list) - pnv_ioda_setup_npu_PE(pdev); -} - static void pnv_pci_ioda_setup_PEs(void) { struct pci_controller *hose, *tmp; @@ -1281,13 +1212,6 @@ static void pnv_pci_ioda_setup_PEs(void) list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; - if (phb->type == PNV_PHB_NPU_NVLINK) { - /* PE#0 is needed for error reporting */ - pnv_ioda_reserve_pe(phb, 0); - pnv_ioda_setup_npu_PEs(hose->bus); - if (phb->model == PNV_PHB_MODEL_NPU2) - pnv_npu2_init(phb); - } if (phb->type == PNV_PHB_NPU_OCAPI) { bus = hose->bus; list_for_each_entry(pdev, &bus->devices, bus_list) @@ -1871,9 +1795,6 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) } *pdev->dev.dma_mask = dma_mask; - /* Update peer npu devices */ - pnv_npu_try_dma_set_bypass(pdev, bypass); - return 0; } @@ -2119,14 +2040,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, } } -void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm) -{ - if (phb->model == PNV_PHB_MODEL_NPU || phb->model == PNV_PHB_MODEL_PHB3) - pnv_pci_phb3_tce_invalidate_entire(phb, rm); - else - opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL, 0, 0, 0, 0); -} - static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, @@ -2615,137 +2528,6 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = { .take_ownership = pnv_ioda2_take_ownership, .release_ownership = pnv_ioda2_release_ownership, }; - -static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque) -{ - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe **ptmppe = opaque; - struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); - struct pci_dn *pdn = pci_get_pdn(pdev); - - if (!pdn || pdn->pe_number == IODA_INVALID_PE) - return 0; - - hose = pci_bus_to_host(pdev->bus); - phb = hose->private_data; - if (phb->type != PNV_PHB_NPU_NVLINK) - return 0; - - *ptmppe = &phb->ioda.pe_array[pdn->pe_number]; - - return 1; -} - -/* - * This returns PE of associated NPU. - * This assumes that NPU is in the same IOMMU group with GPU and there is - * no other PEs. - */ -static struct pnv_ioda_pe *gpe_table_group_to_npe( - struct iommu_table_group *table_group) -{ - struct pnv_ioda_pe *npe = NULL; - int ret = iommu_group_for_each_dev(table_group->group, &npe, - gpe_table_group_to_npe_cb); - - BUG_ON(!ret || !npe); - - return npe; -} - -static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group, - int num, struct iommu_table *tbl) -{ - struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); - int num2 = (num == 0) ? 1 : 0; - long ret = pnv_pci_ioda2_set_window(table_group, num, tbl); - - if (ret) - return ret; - - if (table_group->tables[num2]) - pnv_npu_unset_window(npe, num2); - - ret = pnv_npu_set_window(npe, num, tbl); - if (ret) { - pnv_pci_ioda2_unset_window(table_group, num); - if (table_group->tables[num2]) - pnv_npu_set_window(npe, num2, - table_group->tables[num2]); - } - - return ret; -} - -static long pnv_pci_ioda2_npu_unset_window( - struct iommu_table_group *table_group, - int num) -{ - struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); - int num2 = (num == 0) ? 1 : 0; - long ret = pnv_pci_ioda2_unset_window(table_group, num); - - if (ret) - return ret; - - if (!npe->table_group.tables[num]) - return 0; - - ret = pnv_npu_unset_window(npe, num); - if (ret) - return ret; - - if (table_group->tables[num2]) - ret = pnv_npu_set_window(npe, num2, table_group->tables[num2]); - - return ret; -} - -static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group) -{ - /* - * Detach NPU first as pnv_ioda2_take_ownership() will destroy - * the iommu_table if 32bit DMA is enabled. - */ - pnv_npu_take_ownership(gpe_table_group_to_npe(table_group)); - pnv_ioda2_take_ownership(table_group); -} - -static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = { - .get_table_size = pnv_pci_ioda2_get_table_size, - .create_table = pnv_pci_ioda2_create_table_userspace, - .set_window = pnv_pci_ioda2_npu_set_window, - .unset_window = pnv_pci_ioda2_npu_unset_window, - .take_ownership = pnv_ioda2_npu_take_ownership, - .release_ownership = pnv_ioda2_release_ownership, -}; - -static void pnv_pci_ioda_setup_iommu_api(void) -{ - struct pci_controller *hose, *tmp; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe, *gpe; - - /* - * Now we have all PHBs discovered, time to add NPU devices to - * the corresponding IOMMU groups. - */ - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - phb = hose->private_data; - - if (phb->type != PNV_PHB_NPU_NVLINK) - continue; - - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - gpe = pnv_pci_npu_setup_iommu(pe); - if (gpe) - gpe->table_group.ops = &pnv_pci_ioda2_npu_ops; - } - } -} -#else /* !CONFIG_IOMMU_API */ -static void pnv_pci_ioda_setup_iommu_api(void) { }; #endif static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) @@ -3242,7 +3024,6 @@ static void pnv_pci_enable_bridges(void) static void pnv_pci_ioda_fixup(void) { pnv_pci_ioda_setup_PEs(); - pnv_pci_ioda_setup_iommu_api(); pnv_pci_ioda_create_dbgfs(); pnv_pci_enable_bridges(); @@ -3689,27 +3470,6 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .shutdown = pnv_pci_ioda_shutdown, }; -static int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask) -{ - dev_err_once(&npdev->dev, - "%s operation unsupported for NVLink devices\n", - __func__); - return -EPERM; -} - -static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { - .dma_dev_setup = pnv_pci_dma_dev_setup, -#ifdef CONFIG_PCI_MSI - .setup_msi_irqs = pnv_setup_msi_irqs, - .teardown_msi_irqs = pnv_teardown_msi_irqs, -#endif - .enable_device_hook = pnv_pci_enable_device_hook, - .window_alignment = pnv_pci_window_alignment, - .reset_secondary_bus = pnv_pci_reset_secondary_bus, - .dma_set_mask = pnv_npu_dma_set_mask, - .shutdown = pnv_pci_ioda_shutdown, -}; - static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { .enable_device_hook = pnv_pci_enable_device_hook, .window_alignment = pnv_pci_window_alignment, @@ -3931,9 +3691,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; switch (phb->type) { - case PNV_PHB_NPU_NVLINK: - hose->controller_ops = pnv_npu_ioda_controller_ops; - break; case PNV_PHB_NPU_OCAPI: hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops; break; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 8b37b28e3831..54f2935b7ac5 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -231,17 +231,6 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, #define pe_info(pe, fmt, ...) \ pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) -/* Nvlink functions */ -extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass); -extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); -extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe); -extern long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, - struct iommu_table *tbl); -extern long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num); -extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe); -extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe); -extern int pnv_npu2_init(struct pnv_phb *phb); - /* pci-ioda-tce.c */ #define POWERNV_IOMMU_DEFAULT_LEVELS 1 #define POWERNV_IOMMU_MAX_LEVELS 5
This code has been unused since it was merged and is in the way of cleaning up the DMA code, thus remove it. This effectively reverts commit 5d2aa710 ("powerpc/powernv: Add support for Nvlink NPUs"). Signed-off-by: Christoph Hellwig <hch@lst.de> --- arch/powerpc/include/asm/pci.h | 3 - arch/powerpc/include/asm/powernv.h | 23 - arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/npu-dma.c | 999 ---------------------- arch/powerpc/platforms/powernv/pci-ioda.c | 243 ------ arch/powerpc/platforms/powernv/pci.h | 11 - 6 files changed, 1 insertion(+), 1280 deletions(-) delete mode 100644 arch/powerpc/platforms/powernv/npu-dma.c