Message ID | 7b7278f4-7639-62b3-8a35-e6f7f9afa998@sigmadesigns.com (mailing list archive) |
---|---|
State | New, archived |
Delegated to: | Bjorn Helgaas |
Headers | show |
On 22/08/17 15:56, Marc Gonzalez wrote: > The MSI controller in Tango supports 256 message-signaled interrupts > and a single doorbell address. > > Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com> > --- > Changes from v9 to v10 > - Start from Bjorn's cleanup branch > - Clean up the MSI init and unused statements > - Based on top of v4.13-rc6 > > Hello Bjorn, > > This patch is almost identical to the patch reviewed by Marc Zyngier > on June 14 (10 weeks ago). I'm not sure he could review this patch > again in time for 4.14 (given his work load these past few weeks). Thanks for worrying about my workload. > > The host bridge part landed in 4.13, but the driver is useless > without MSI support (legacy interrupts are not supported). > Can you take it for 4.14? > > Regards. > --- > drivers/pci/host/pcie-tango.c | 191 +++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 189 insertions(+), 2 deletions(-) > > diff --git a/drivers/pci/host/pcie-tango.c b/drivers/pci/host/pcie-tango.c > index 6bbb81f06a53..d672271ad719 100644 > --- a/drivers/pci/host/pcie-tango.c > +++ b/drivers/pci/host/pcie-tango.c > @@ -1,12 +1,170 @@ > +#include <linux/irqchip/chained_irq.h> > +#include <linux/irqdomain.h> > #include <linux/pci-ecam.h> > #include <linux/delay.h> > +#include <linux/msi.h> > #include <linux/of.h> > > +#define MSI_MAX 256 > + > #define SMP8759_MUX 0x48 > #define SMP8759_TEST_OUT 0x74 > +#define SMP8759_STATUS 0x80 > +#define SMP8759_ENABLE 0xa0 > +#define SMP8759_DOORBELL 0xa002e07c Why is this hardcoded and not coming from the device-tree, just like any other address property? > > struct tango_pcie { > - void __iomem *base; > + DECLARE_BITMAP(used_msi, MSI_MAX); > + spinlock_t used_msi_lock; > + void __iomem *base; > + struct irq_domain *dom; > +}; > + > +static void tango_msi_isr(struct irq_desc *desc) > +{ > + struct irq_chip *chip = irq_desc_get_chip(desc); > + struct tango_pcie *pcie = irq_desc_get_handler_data(desc); > + unsigned long status, base, virq, idx, pos = 0; > + > + chained_irq_enter(chip, desc); > + spin_lock(&pcie->used_msi_lock); > + > + while ((pos = find_next_bit(pcie->used_msi, MSI_MAX, pos)) < MSI_MAX) { > + base = round_down(pos, 32); > + status = readl_relaxed(pcie->base + SMP8759_STATUS + base / 8); > + for_each_set_bit(idx, &status, 32) { > + virq = irq_find_mapping(pcie->dom, base + idx); > + generic_handle_irq(virq); > + } > + pos = base + 32; > + } > + > + spin_unlock(&pcie->used_msi_lock); > + chained_irq_exit(chip, desc); > +} > + > +static void tango_ack(struct irq_data *d) > +{ > + struct tango_pcie *pcie = d->chip_data; > + u32 offset = (d->hwirq / 32) * 4; > + u32 bit = BIT(d->hwirq % 32); > + > + writel_relaxed(bit, pcie->base + SMP8759_STATUS + offset); > +} > + > +static void update_msi_enable(struct irq_data *d, bool unmask) > +{ > + unsigned long flags; > + struct tango_pcie *pcie = d->chip_data; > + u32 offset = (d->hwirq / 32) * 4; > + u32 bit = BIT(d->hwirq % 32); > + u32 val; > + > + spin_lock_irqsave(&pcie->used_msi_lock, flags); > + val = readl_relaxed(pcie->base + SMP8759_ENABLE + offset); > + val = unmask ? val | bit : val & ~bit; > + writel_relaxed(val, pcie->base + SMP8759_ENABLE + offset); > + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); > +} > + > +static void tango_mask(struct irq_data *d) > +{ > + update_msi_enable(d, false); > +} > + > +static void tango_unmask(struct irq_data *d) > +{ > + update_msi_enable(d, true); > +} > + > +static int tango_set_affinity(struct irq_data *d, const struct cpumask *mask, > + bool force) > +{ > + return -EINVAL; > +} > + > +static void tango_compose_msi_msg(struct irq_data *d, struct msi_msg *msg) > +{ > + msg->address_lo = lower_32_bits(SMP8759_DOORBELL); > + msg->address_hi = upper_32_bits(SMP8759_DOORBELL); > + msg->data = d->hwirq; > +} > + > +static struct irq_chip tango_chip = { > + .irq_ack = tango_ack, > + .irq_mask = tango_mask, > + .irq_unmask = tango_unmask, > + .irq_set_affinity = tango_set_affinity, > + .irq_compose_msi_msg = tango_compose_msi_msg, > +}; > + > +static void msi_ack(struct irq_data *d) > +{ > + irq_chip_ack_parent(d); > +} > + > +static void msi_mask(struct irq_data *d) > +{ > + pci_msi_mask_irq(d); > + irq_chip_mask_parent(d); > +} > + > +static void msi_unmask(struct irq_data *d) > +{ > + pci_msi_unmask_irq(d); > + irq_chip_unmask_parent(d); > +} > + > +static struct irq_chip msi_chip = { > + .name = "MSI", > + .irq_ack = msi_ack, > + .irq_mask = msi_mask, > + .irq_unmask = msi_unmask, > +}; > + > +static struct msi_domain_info msi_dom_info = { > + .flags = MSI_FLAG_PCI_MSIX > + | MSI_FLAG_USE_DEF_DOM_OPS > + | MSI_FLAG_USE_DEF_CHIP_OPS, > + .chip = &msi_chip, > +}; > + > +static int tango_irq_domain_alloc(struct irq_domain *dom, unsigned int virq, > + unsigned int nr_irqs, void *args) > +{ > + struct tango_pcie *pcie = dom->host_data; > + unsigned long flags; > + int pos; > + > + spin_lock_irqsave(&pcie->used_msi_lock, flags); > + pos = find_first_zero_bit(pcie->used_msi, MSI_MAX); > + if (pos >= MSI_MAX) { > + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); > + return -ENOSPC; > + } > + __set_bit(pos, pcie->used_msi); > + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); > + irq_domain_set_info(dom, virq, pos, &tango_chip, > + pcie, handle_edge_irq, NULL, NULL); > + > + return 0; > +} > + > +static void tango_irq_domain_free(struct irq_domain *dom, unsigned int virq, > + unsigned int nr_irqs) > +{ > + unsigned long flags; > + struct irq_data *d = irq_domain_get_irq_data(dom, virq); > + struct tango_pcie *pcie = d->chip_data; > + > + spin_lock_irqsave(&pcie->used_msi_lock, flags); > + __clear_bit(d->hwirq, pcie->used_msi); > + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); > +} > + > +static const struct irq_domain_ops dom_ops = { > + .alloc = tango_irq_domain_alloc, > + .free = tango_irq_domain_free, > }; > > static int smp8759_config_read(struct pci_bus *bus, unsigned int devfn, > @@ -76,7 +234,9 @@ static int tango_pcie_probe(struct platform_device *pdev) > struct device *dev = &pdev->dev; > struct tango_pcie *pcie; > struct resource *res; > - int ret; > + struct irq_domain *msi_dom, *irq_dom; > + struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node); > + int ret, reg, virq; > > dev_warn(dev, "simultaneous PCI config and MMIO accesses may cause data corruption\n"); > add_taint(TAINT_CRAP, LOCKDEP_STILL_OK); > @@ -95,6 +255,33 @@ static int tango_pcie_probe(struct platform_device *pdev) > if (!tango_pcie_link_up(pcie)) > return -ENODEV; > > + for (reg = 0; reg < MSI_MAX / 8; reg += 4) > + writel_relaxed(0, pcie->base + SMP8759_ENABLE + reg); > + > + virq = platform_get_irq(pdev, 1); > + if (virq <= 0) { > + dev_err(dev, "Failed to map IRQ\n"); > + return -ENXIO; > + } > + > + irq_dom = irq_domain_create_linear(fwnode, MSI_MAX, &dom_ops, pcie); > + if (!irq_dom) { > + dev_err(dev, "Failed to create IRQ domain\n"); > + return -ENOMEM; > + } > + > + msi_dom = pci_msi_create_irq_domain(fwnode, &msi_dom_info, irq_dom); > + if (!msi_dom) { > + dev_err(dev, "Failed to create MSI domain\n"); > + irq_domain_remove(irq_dom); > + return -ENOMEM; > + } > + > + pcie->dom = irq_dom; > + spin_lock_init(&pcie->used_msi_lock); > + > + irq_set_chained_handler_and_data(virq, tango_msi_isr, pcie); > + > return pci_host_common_probe(pdev, &smp8759_ecam_ops); > } > > The remark above notwithstanding: Acked-by: Marc Zyngier <marc.zyngier@arm.com> M.
On 22/08/2017 18:29, Marc Zyngier wrote: > On 22/08/17 15:56, Marc Gonzalez wrote: > >> #define SMP8759_MUX 0x48 >> #define SMP8759_TEST_OUT 0x74 >> +#define SMP8759_STATUS 0x80 >> +#define SMP8759_ENABLE 0xa0 >> +#define SMP8759_DOORBELL 0xa002e07c > > Why is this hardcoded and not coming from the device-tree, just like any > other address property? Since this bus address is software-configurable, I didn't think it belonged in the DT. Also, I didn't see anything similar in other binding docs, especially Documentation/devicetree/bindings/interrupt-controller/msi.txt Regards.
On Tue, Aug 22 2017 at 8:02:18 pm BST, Marc Gonzalez <marc_gonzalez@sigmadesigns.com> wrote: > On 22/08/2017 18:29, Marc Zyngier wrote: > >> On 22/08/17 15:56, Marc Gonzalez wrote: >> >>> #define SMP8759_MUX 0x48 >>> #define SMP8759_TEST_OUT 0x74 >>> +#define SMP8759_STATUS 0x80 >>> +#define SMP8759_ENABLE 0xa0 >>> +#define SMP8759_DOORBELL 0xa002e07c >> >> Why is this hardcoded and not coming from the device-tree, just like any >> other address property? > > Since this bus address is software-configurable, I didn't think > it belonged in the DT. Also, I didn't see anything similar in > other binding docs, especially > > Documentation/devicetree/bindings/interrupt-controller/msi.txt If that's software configurable, how on Earth did you pick the address? How do you ensure that it doesn't conflict with DMA? How is it configured into the RC? M.
On 22/08/2017 16:56, Marc Gonzalez wrote: > @@ -76,7 +234,9 @@ static int tango_pcie_probe(struct platform_device *pdev) > struct device *dev = &pdev->dev; > struct tango_pcie *pcie; > struct resource *res; > - int ret; > + struct irq_domain *msi_dom, *irq_dom; > + struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node); > + int ret, reg, virq; > > dev_warn(dev, "simultaneous PCI config and MMIO accesses may cause data corruption\n"); > add_taint(TAINT_CRAP, LOCKDEP_STILL_OK); CC drivers/pci/host/pcie-tango.o drivers/pci/host/pcie-tango.c: In function 'tango_pcie_probe': drivers/pci/host/pcie-tango.c:257:6: warning: unused variable 'ret' [-Wunused-variable] int ret, reg, virq; ^~~ Hmmm, dunno how I managed to miss that... Is the kbuild test robot enjoying a well-deserved vacation? Bjorn, if/when you take the patch, can you first apply this fixup: @@ -254,7 +254,7 @@ static int tango_pcie_probe(struct platform_device *pdev) struct resource *res; struct irq_domain *msi_dom, *irq_dom; struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node); - int ret, reg, virq; + int reg, virq; dev_warn(dev, "simultaneous PCI config and MMIO accesses may cause data corruption\n"); add_taint(TAINT_CRAP, LOCKDEP_STILL_OK); Regards.
On Wed, Aug 23, 2017 at 02:59:42PM +0200, Marc Gonzalez wrote: > On 22/08/2017 16:56, Marc Gonzalez wrote: > > > @@ -76,7 +234,9 @@ static int tango_pcie_probe(struct platform_device *pdev) > > struct device *dev = &pdev->dev; > > struct tango_pcie *pcie; > > struct resource *res; > > - int ret; > > + struct irq_domain *msi_dom, *irq_dom; > > + struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node); > > + int ret, reg, virq; > > > > dev_warn(dev, "simultaneous PCI config and MMIO accesses may cause data corruption\n"); > > add_taint(TAINT_CRAP, LOCKDEP_STILL_OK); > > CC drivers/pci/host/pcie-tango.o > drivers/pci/host/pcie-tango.c: In function 'tango_pcie_probe': > drivers/pci/host/pcie-tango.c:257:6: warning: unused variable 'ret' [-Wunused-variable] > int ret, reg, virq; > ^~~ > > Hmmm, dunno how I managed to miss that... > Is the kbuild test robot enjoying a well-deserved vacation? > > Bjorn, if/when you take the patch, can you first apply this fixup: Sure, no problem. > @@ -254,7 +254,7 @@ static int tango_pcie_probe(struct platform_device *pdev) > struct resource *res; > struct irq_domain *msi_dom, *irq_dom; > struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node); > - int ret, reg, virq; > + int reg, virq; > > dev_warn(dev, "simultaneous PCI config and MMIO accesses may cause data corruption\n"); > add_taint(TAINT_CRAP, LOCKDEP_STILL_OK); > > > Regards. >
On Tue, Aug 22, 2017 at 09:03:41PM +0100, Marc Zyngier wrote: > On Tue, Aug 22 2017 at 8:02:18 pm BST, Marc Gonzalez <marc_gonzalez@sigmadesigns.com> wrote: > > On 22/08/2017 18:29, Marc Zyngier wrote: > > > >> On 22/08/17 15:56, Marc Gonzalez wrote: > >> > >>> #define SMP8759_MUX 0x48 > >>> #define SMP8759_TEST_OUT 0x74 > >>> +#define SMP8759_STATUS 0x80 > >>> +#define SMP8759_ENABLE 0xa0 > >>> +#define SMP8759_DOORBELL 0xa002e07c > >> > >> Why is this hardcoded and not coming from the device-tree, just like any > >> other address property? > > > > Since this bus address is software-configurable, I didn't think > > it belonged in the DT. Also, I didn't see anything similar in > > other binding docs, especially > > > > Documentation/devicetree/bindings/interrupt-controller/msi.txt > > If that's software configurable, how on Earth did you pick the address? > How do you ensure that it doesn't conflict with DMA? How is it > configured into the RC? But we *do* need to resolve this. This does seem like an address that shouldn't be hard-coded into the driver. Since this driver is programming the address into an MSI message, but not into the receiver of that message, there's a coordination issue between this driver and whatever other software does that receiver configuration. Bjorn
On 24/08/2017 19:04, Bjorn Helgaas wrote: > On Tue, Aug 22, 2017 at 09:03:41PM +0100, Marc Zyngier wrote: >> Marc Gonzalez wrote: >>> On 22/08/2017 18:29, Marc Zyngier wrote: >>>> On 22/08/17 15:56, Marc Gonzalez wrote: >>>> >>>>> #define SMP8759_MUX 0x48 >>>>> #define SMP8759_TEST_OUT 0x74 >>>>> +#define SMP8759_STATUS 0x80 >>>>> +#define SMP8759_ENABLE 0xa0 >>>>> +#define SMP8759_DOORBELL 0xa002e07c >>>> >>>> Why is this hardcoded and not coming from the device-tree, just like any >>>> other address property? >>> >>> Since this bus address is software-configurable, I didn't think >>> it belonged in the DT. Also, I didn't see anything similar in >>> other binding docs, especially >>> >>> Documentation/devicetree/bindings/interrupt-controller/msi.txt >> >> If that's software configurable, how on Earth did you pick the address? >> How do you ensure that it doesn't conflict with DMA? How is it >> configured into the RC? > > But we *do* need to resolve this. This does seem like an address that > shouldn't be hard-coded into the driver. Since this driver is > programming the address into an MSI message, but not into the receiver > of that message, there's a coordination issue between this driver and > whatever other software does that receiver configuration. OK. I'll move the doorbell address to the DT for v11. What property should be used for this address? sigma,doorbell ? Or maybe I can put it in reg, since I have a 1:1 mapping between bus and cpu addresses? git grep -i doorbell arch/arm/boot/dts/ arch/arm64/boot/dts/ returns nothing. Regards.
On 24 August 2017 at 18:51, Marc Gonzalez <marc_gonzalez@sigmadesigns.com> wrote: > On 24/08/2017 19:04, Bjorn Helgaas wrote: >> On Tue, Aug 22, 2017 at 09:03:41PM +0100, Marc Zyngier wrote: >>> Marc Gonzalez wrote: >>>> On 22/08/2017 18:29, Marc Zyngier wrote: >>>>> On 22/08/17 15:56, Marc Gonzalez wrote: >>>>> >>>>>> #define SMP8759_MUX 0x48 >>>>>> #define SMP8759_TEST_OUT 0x74 >>>>>> +#define SMP8759_STATUS 0x80 >>>>>> +#define SMP8759_ENABLE 0xa0 >>>>>> +#define SMP8759_DOORBELL 0xa002e07c >>>>> >>>>> Why is this hardcoded and not coming from the device-tree, just like any >>>>> other address property? >>>> >>>> Since this bus address is software-configurable, I didn't think >>>> it belonged in the DT. Also, I didn't see anything similar in >>>> other binding docs, especially >>>> >>>> Documentation/devicetree/bindings/interrupt-controller/msi.txt >>> >>> If that's software configurable, how on Earth did you pick the address? >>> How do you ensure that it doesn't conflict with DMA? How is it >>> configured into the RC? >> >> But we *do* need to resolve this. This does seem like an address that >> shouldn't be hard-coded into the driver. Since this driver is >> programming the address into an MSI message, but not into the receiver >> of that message, there's a coordination issue between this driver and >> whatever other software does that receiver configuration. > > OK. I'll move the doorbell address to the DT for v11. > > What property should be used for this address? > > sigma,doorbell ? > > Or maybe I can put it in reg, since I have a 1:1 mapping > between bus and cpu addresses? > > git grep -i doorbell arch/arm/boot/dts/ arch/arm64/boot/dts/ > returns nothing. > You haven't answered the question yet: you stated that the doorbell address is software configurable, yet your code does not seem to configure it. It only returns the doorbell address so that it gets communicated to the downstream devices. So how does the RC know which address is special, so it can trigger on inbound writes hitting that address and assert the SPI ?
On 24/08/2017 20:35, Ard Biesheuvel wrote: > On 24 August 2017 at 18:51, Marc Gonzalez wrote: >> On 24/08/2017 19:04, Bjorn Helgaas wrote: >>> On Tue, Aug 22, 2017 Marc Zyngier wrote: >>>> Marc Gonzalez wrote: >>>>> On 22/08/2017 18:29, Marc Zyngier wrote: >>>>>> On 22/08/17 15:56, Marc Gonzalez wrote: >>>>>> >>>>>>> #define SMP8759_MUX 0x48 >>>>>>> #define SMP8759_TEST_OUT 0x74 >>>>>>> +#define SMP8759_STATUS 0x80 >>>>>>> +#define SMP8759_ENABLE 0xa0 >>>>>>> +#define SMP8759_DOORBELL 0xa002e07c >>>>>> >>>>>> Why is this hardcoded and not coming from the device-tree, just like any >>>>>> other address property? >>>>> >>>>> Since this bus address is software-configurable, I didn't think >>>>> it belonged in the DT. Also, I didn't see anything similar in >>>>> other binding docs, especially >>>>> >>>>> Documentation/devicetree/bindings/interrupt-controller/msi.txt >>>> >>>> If that's software configurable, how on Earth did you pick the address? >>>> How do you ensure that it doesn't conflict with DMA? How is it >>>> configured into the RC? >>> >>> But we *do* need to resolve this. This does seem like an address that >>> shouldn't be hard-coded into the driver. Since this driver is >>> programming the address into an MSI message, but not into the receiver >>> of that message, there's a coordination issue between this driver and >>> whatever other software does that receiver configuration. >> >> OK. I'll move the doorbell address to the DT for v11. >> >> What property should be used for this address? >> >> sigma,doorbell ? >> >> Or maybe I can put it in reg, since I have a 1:1 mapping >> between bus and cpu addresses? >> >> git grep -i doorbell arch/arm/boot/dts/ arch/arm64/boot/dts/ >> returns nothing. > > You haven't answered the question yet: you stated that the doorbell > address is software configurable, yet your code does not seem to > configure it. It only returns the doorbell address so that it gets > communicated to the downstream devices. > > So how does the RC know which address is special, so it can trigger on > inbound writes hitting that address and assert the SPI ? The CPU address of the MSI doorbell address is 0x2e07c i.e. within the reg space of the PCIe controller block. As I discussed back in March, the RC implements an odd bus-to-system mapping. RC BAR0 defines a window in PCI address space (max 1GB). Accesses outside this window are silently ignored. The window is divided into 8 "regions" and there are 8 registers defining the offset into CPU space. In pseudo code, assuming pci_address is within the window defined by BAR0: cpu_address map_bus_to_system(pci_address) { temp = pci_address - BAR0.base region = temp / region_size offset = temp % region_size cpu_address = region_reg[region] + offset return cpu_address } The current setup is: DRAM at 0x80000000-0xa0000000 BAR0.base = 0x80000000 REGION[0] = 0x80000000 REGION[1] = 0x88000000 REGION[2] = 0x90000000 REGION[3] = 0x98000000 REGION[4] = 0x0 (This map means 1:1 identity for DRAM addresses.) Thus when a device writes to 0xa002e07c (region 4) the write is forwarded to 0x2e07c. Regards.
On Thu, Aug 24 2017 at 10:53:16 pm BST, Mason <slash.tmp@free.fr> wrote: > On 24/08/2017 20:35, Ard Biesheuvel wrote: >> On 24 August 2017 at 18:51, Marc Gonzalez wrote: >>> On 24/08/2017 19:04, Bjorn Helgaas wrote: >>>> On Tue, Aug 22, 2017 Marc Zyngier wrote: >>>>> Marc Gonzalez wrote: >>>>>> On 22/08/2017 18:29, Marc Zyngier wrote: >>>>>>> On 22/08/17 15:56, Marc Gonzalez wrote: >>>>>>> >>>>>>>> #define SMP8759_MUX 0x48 >>>>>>>> #define SMP8759_TEST_OUT 0x74 >>>>>>>> +#define SMP8759_STATUS 0x80 >>>>>>>> +#define SMP8759_ENABLE 0xa0 >>>>>>>> +#define SMP8759_DOORBELL 0xa002e07c >>>>>>> >>>>>>> Why is this hardcoded and not coming from the device-tree, just like any >>>>>>> other address property? >>>>>> >>>>>> Since this bus address is software-configurable, I didn't think >>>>>> it belonged in the DT. Also, I didn't see anything similar in >>>>>> other binding docs, especially >>>>>> >>>>>> Documentation/devicetree/bindings/interrupt-controller/msi.txt >>>>> >>>>> If that's software configurable, how on Earth did you pick the address? >>>>> How do you ensure that it doesn't conflict with DMA? How is it >>>>> configured into the RC? >>>> >>>> But we *do* need to resolve this. This does seem like an address that >>>> shouldn't be hard-coded into the driver. Since this driver is >>>> programming the address into an MSI message, but not into the receiver >>>> of that message, there's a coordination issue between this driver and >>>> whatever other software does that receiver configuration. >>> >>> OK. I'll move the doorbell address to the DT for v11. >>> >>> What property should be used for this address? >>> >>> sigma,doorbell ? >>> >>> Or maybe I can put it in reg, since I have a 1:1 mapping >>> between bus and cpu addresses? >>> >>> git grep -i doorbell arch/arm/boot/dts/ arch/arm64/boot/dts/ >>> returns nothing. >> >> You haven't answered the question yet: you stated that the doorbell >> address is software configurable, yet your code does not seem to >> configure it. It only returns the doorbell address so that it gets >> communicated to the downstream devices. >> >> So how does the RC know which address is special, so it can trigger on >> inbound writes hitting that address and assert the SPI ? > > The CPU address of the MSI doorbell address is 0x2e07c > i.e. within the reg space of the PCIe controller block. Which you describe in DT already, right? So why aren't you using an offset in this region as your MSI ddorbell (potentially applying an offset, see below)? > > As I discussed back in March, the RC implements an odd > bus-to-system mapping. > > RC BAR0 defines a window in PCI address space (max 1GB). > Accesses outside this window are silently ignored. > The window is divided into 8 "regions" and there are 8 > registers defining the offset into CPU space. > > In pseudo code, assuming pci_address is within the > window defined by BAR0: > > cpu_address map_bus_to_system(pci_address) > { > temp = pci_address - BAR0.base > region = temp / region_size > offset = temp % region_size > cpu_address = region_reg[region] + offset > return cpu_address > } > > The current setup is: > > DRAM at 0x80000000-0xa0000000 > BAR0.base = 0x80000000 > REGION[0] = 0x80000000 > REGION[1] = 0x88000000 > REGION[2] = 0x90000000 > REGION[3] = 0x98000000 > REGION[4] = 0x0 > > (This map means 1:1 identity for DRAM addresses.) > > Thus when a device writes to 0xa002e07c (region 4) > the write is forwarded to 0x2e07c. But how do you find out about the 0xa0000000 offset? You must make sure that the provided address is outside of RAM, should you end-up on a system more than 1GB of RAM. M.
On 25/08/2017 09:54, Marc Zyngier wrote: > On Thu, Aug 24 2017 at 10:53:16 pm BST, Mason <slash.tmp@free.fr> wrote: >> On 24/08/2017 20:35, Ard Biesheuvel wrote: >>> On 24 August 2017 at 18:51, Marc Gonzalez wrote: >>>> On 24/08/2017 19:04, Bjorn Helgaas wrote: >>>>> On Tue, Aug 22, 2017 Marc Zyngier wrote: >>>>>> Marc Gonzalez wrote: >>>>>>> On 22/08/2017 18:29, Marc Zyngier wrote: >>>>>>>> On 22/08/17 15:56, Marc Gonzalez wrote: >>>>>>>> >>>>>>>>> #define SMP8759_MUX 0x48 >>>>>>>>> #define SMP8759_TEST_OUT 0x74 >>>>>>>>> +#define SMP8759_STATUS 0x80 >>>>>>>>> +#define SMP8759_ENABLE 0xa0 >>>>>>>>> +#define SMP8759_DOORBELL 0xa002e07c >>>>>>>> >>>>>>>> Why is this hardcoded and not coming from the device-tree, just like any >>>>>>>> other address property? >>>>>>> >>>>>>> Since this bus address is software-configurable, I didn't think >>>>>>> it belonged in the DT. Also, I didn't see anything similar in >>>>>>> other binding docs, especially >>>>>>> >>>>>>> Documentation/devicetree/bindings/interrupt-controller/msi.txt >>>>>> >>>>>> If that's software configurable, how on Earth did you pick the address? >>>>>> How do you ensure that it doesn't conflict with DMA? How is it >>>>>> configured into the RC? >>>>> >>>>> But we *do* need to resolve this. This does seem like an address that >>>>> shouldn't be hard-coded into the driver. Since this driver is >>>>> programming the address into an MSI message, but not into the receiver >>>>> of that message, there's a coordination issue between this driver and >>>>> whatever other software does that receiver configuration. >>>> >>>> OK. I'll move the doorbell address to the DT for v11. >>>> >>>> What property should be used for this address? >>>> >>>> sigma,doorbell ? >>>> >>>> Or maybe I can put it in reg, since I have a 1:1 mapping >>>> between bus and cpu addresses? >>>> >>>> git grep -i doorbell arch/arm/boot/dts/ arch/arm64/boot/dts/ >>>> returns nothing. >>> >>> You haven't answered the question yet: you stated that the doorbell >>> address is software configurable, yet your code does not seem to >>> configure it. It only returns the doorbell address so that it gets >>> communicated to the downstream devices. >>> >>> So how does the RC know which address is special, so it can trigger on >>> inbound writes hitting that address and assert the SPI ? >> >> The CPU address of the MSI doorbell address is 0x2e07c >> i.e. within the reg space of the PCIe controller block. > > Which you describe in DT already, right? So why aren't you using an > offset in this region as your MSI doorbell (potentially applying an > offset, see below)? Yes, the controller is described in DT: pcie@2e000 { compatible = "sigma,smp8759-pcie"; reg = <0x50000000 0x400000>, <0x2e000 0x100>; IIUC, you're saying I don't need the doorbell address explicitly in the DT, because I can compute: 0x2e000 (from the DT) + 0x7c (offset within the block) OK, that sounds right. Then there is the matter of the region offset, i.e. 0xa0000000 in my current code. It might also be worth keeping in mind that there is a second revision of the PCIe controller that handles the doorbell differently. In rev2, I just pick an arbitrary address within the window, program that bus address into the controller, and the controller knows to forward that single address to the right place. See "[RFC PATCH v0.2] PCI: Add support for tango PCIe host bridge" for an example of this. Typically, I pick the first address of the window, BAR0.base, since I assume no device will ever need to read/write the first word of RAM. >> As I discussed back in March, the RC implements an odd >> bus-to-system mapping. >> >> RC BAR0 defines a window in PCI address space (max 1GB). >> Accesses outside this window are silently ignored. >> The window is divided into 8 "regions" and there are 8 >> registers defining the offset into CPU space. >> >> In pseudo code, assuming pci_address is within the >> window defined by BAR0: >> >> cpu_address map_bus_to_system(pci_address) >> { >> temp = pci_address - BAR0.base >> region = temp / region_size >> offset = temp % region_size >> cpu_address = region_reg[region] + offset >> return cpu_address >> } >> >> The current setup is: >> >> DRAM at 0x80000000-0xa0000000 >> BAR0.base = 0x80000000 >> REGION[0] = 0x80000000 >> REGION[1] = 0x88000000 >> REGION[2] = 0x90000000 >> REGION[3] = 0x98000000 >> REGION[4] = 0x0 >> >> (This map means 1:1 identity for DRAM addresses.) >> >> Thus when a device writes to 0xa002e07c (region 4) >> the write is forwarded to 0x2e07c. > > But how do you find out about the 0xa0000000 offset? You must make sure > that the provided address is outside of RAM, should you end-up on a > system more than 1GB of RAM. You're right, I've swept this issue under the rug so far. The boards typically come with either - two 512MB DIMMs - two 1GB DIMMs (there may be other setups I'm not aware of, e.g. with a single memory module). In the DT for my dev board, I describe *all* of the RAM. memory@80000000 { device_type = "memory"; reg = <0x80000000 0x80000000>; /* 2 GB */ }; But actually, Linux is only given to manage a fraction of this memory, as some of it is for other processors and DSPs, and a large part is for video decoder buffers. In the end, Linux manages, typically 128MB, 256MB, 512MB, 2x128MB, or 2x256MB (there may be other setups I'm not aware of) And the actual config is passed to Linux through a mem= command-line directive. It is not clear to me, as I discussed with Ard, how the Linux driver is supposed to make this all work. Maybe I can have some platform code that walks the different RAM areas available to Linux, and sets up the appropriate physical-to-dma mappings? Regards.
On 25/08/2017 09:54, Marc Zyngier wrote: > On Thu, Aug 24 2017 at 10:53:16 pm BST, Mason <slash.tmp@free.fr> wrote: >> On 24/08/2017 20:35, Ard Biesheuvel wrote: >>> On 24 August 2017 at 18:51, Marc Gonzalez wrote: >>>> On 24/08/2017 19:04, Bjorn Helgaas wrote: >>>>> On Tue, Aug 22, 2017 Marc Zyngier wrote: >>>>>> Marc Gonzalez wrote: >>>>>>> On 22/08/2017 18:29, Marc Zyngier wrote: >>>>>>>> On 22/08/17 15:56, Marc Gonzalez wrote: >>>>>>>> >>>>>>>>> #define SMP8759_MUX 0x48 >>>>>>>>> #define SMP8759_TEST_OUT 0x74 >>>>>>>>> +#define SMP8759_STATUS 0x80 >>>>>>>>> +#define SMP8759_ENABLE 0xa0 >>>>>>>>> +#define SMP8759_DOORBELL 0xa002e07c >>>>>>>> >>>>>>>> Why is this hardcoded and not coming from the device-tree, just like any >>>>>>>> other address property? >>>>>>> >>>>>>> Since this bus address is software-configurable, I didn't think >>>>>>> it belonged in the DT. Also, I didn't see anything similar in >>>>>>> other binding docs, especially >>>>>>> >>>>>>> Documentation/devicetree/bindings/interrupt-controller/msi.txt >>>>>> >>>>>> If that's software configurable, how on Earth did you pick the address? >>>>>> How do you ensure that it doesn't conflict with DMA? How is it >>>>>> configured into the RC? >>>>> >>>>> But we *do* need to resolve this. This does seem like an address that >>>>> shouldn't be hard-coded into the driver. Since this driver is >>>>> programming the address into an MSI message, but not into the receiver >>>>> of that message, there's a coordination issue between this driver and >>>>> whatever other software does that receiver configuration. >>>> >>>> OK. I'll move the doorbell address to the DT for v11. >>>> >>>> What property should be used for this address? >>>> >>>> sigma,doorbell ? >>>> >>>> Or maybe I can put it in reg, since I have a 1:1 mapping >>>> between bus and cpu addresses? >>>> >>>> git grep -i doorbell arch/arm/boot/dts/ arch/arm64/boot/dts/ >>>> returns nothing. >>> >>> You haven't answered the question yet: you stated that the doorbell >>> address is software configurable, yet your code does not seem to >>> configure it. It only returns the doorbell address so that it gets >>> communicated to the downstream devices. >>> >>> So how does the RC know which address is special, so it can trigger on >>> inbound writes hitting that address and assert the SPI ? >> >> The CPU address of the MSI doorbell address is 0x2e07c >> i.e. within the reg space of the PCIe controller block. > > Which you describe in DT already, right? So why aren't you using an > offset in this region as your MSI ddorbell (potentially applying an > offset, see below)? > > >> As I discussed back in March, the RC implements an odd >> bus-to-system mapping. >> >> RC BAR0 defines a window in PCI address space (max 1GB). >> Accesses outside this window are silently ignored. >> The window is divided into 8 "regions" and there are 8 >> registers defining the offset into CPU space. >> >> In pseudo code, assuming pci_address is within the >> window defined by BAR0: >> >> cpu_address map_bus_to_system(pci_address) >> { >> temp = pci_address - BAR0.base >> region = temp / region_size >> offset = temp % region_size >> cpu_address = region_reg[region] + offset >> return cpu_address >> } >> >> The current setup is: >> >> DRAM at 0x80000000-0xa0000000 >> BAR0.base = 0x80000000 >> REGION[0] = 0x80000000 >> REGION[1] = 0x88000000 >> REGION[2] = 0x90000000 >> REGION[3] = 0x98000000 >> REGION[4] = 0x0 >> >> (This map means 1:1 identity for DRAM addresses.) >> >> Thus when a device writes to 0xa002e07c (region 4) >> the write is forwarded to 0x2e07c. > > But how do you find out about the 0xa0000000 offset? You must make sure > that the provided address is outside of RAM, should you end-up on a > system more than 1GB of RAM. Robin wrote a prophetic post back in March: http://lists.infradead.org/pipermail/linux-arm-kernel/2017-March/492965.html > The appropriate DT property would be "dma-ranges", i.e. > > pci@... { > ... > dma-ranges = <(PCI bus address) (CPU phys address) (size)>; > } The dma-ranges property seems to be exactly what I'm looking for: Restrict DMA to the first X MB of RAM (use a bounce buffer for other physical addresses). I added the following property to my PCIe node dma-ranges = <0x0 0x80000000 0x80000000 0x20000000>; with the intent to create a 1:1 mapping for [0x80000000, 0xa0000000[ But it does not work. Arg! My PCIe controller driver seems to be correctly calling of_dma_get_range: [ 0.520469] [<c03d85e8>] (of_dma_get_range) from [<c03d5ad8>] (of_dma_configure+0x48/0x234) [ 0.520483] [<c03d5ad8>] (of_dma_configure) from [<c02fa154>] (pci_device_add+0xac/0x350) [ 0.520493] [<c02fa154>] (pci_device_add) from [<c02fa488>] (pci_scan_single_device+0x90/0xb0) [ 0.520501] [<c02fa488>] (pci_scan_single_device) from [<c02fa500>] (pci_scan_slot+0x58/0x100) [ 0.520510] [<c02fa500>] (pci_scan_slot) from [<c02fb418>] (pci_scan_child_bus+0x20/0xf8) [ 0.520519] [<c02fb418>] (pci_scan_child_bus) from [<c02fb6e8>] (pci_scan_root_bus_msi+0xcc/0xd8) [ 0.520527] [<c02fb6e8>] (pci_scan_root_bus_msi) from [<c02fb70c>] (pci_scan_root_bus+0x18/0x20) [ 0.520537] [<c02fb70c>] (pci_scan_root_bus) from [<c0310544>] (pci_host_common_probe+0xc8/0x314) [ 0.520546] [<c0310544>] (pci_host_common_probe) from [<c0310ce8>] (tango_pcie_probe+0x148/0x350) [ 0.520557] [<c0310ce8>] (tango_pcie_probe) from [<c034d398>] (platform_drv_probe+0x34/0x6c) of_dma_get_range() is called on the pcie node (which is expected) but after parsing n_addr_cells and n_size_cells in the while loop, the code jumps to the parent node ("soc")... while my property is attached to the pcie node... [ 0.507754] of_dma_get_range: node=dfbf74cc np=dfbf74cc name=/soc/pcie@2e000 name2=/soc/pcie@2e000 ... [ 0.509162] __of_find_property: node=soc find=#address-cells prop=compatible [ 0.509168] __of_find_property: node=soc find=#address-cells prop=interrupt-parent [ 0.509173] __of_find_property: node=soc find=#address-cells prop=#address-cells [ 0.509178] __of_find_property: node=soc find=#size-cells prop=compatible [ 0.509182] __of_find_property: node=soc find=#size-cells prop=interrupt-parent [ 0.509186] __of_find_property: node=soc find=#size-cells prop=#address-cells [ 0.509190] __of_find_property: node=soc find=#size-cells prop=#size-cells [ 0.509195] __of_find_property: node=soc find=dma-ranges prop=compatible [ 0.509199] __of_find_property: node=soc find=dma-ranges prop=interrupt-parent [ 0.509203] __of_find_property: node=soc find=dma-ranges prop=#address-cells [ 0.509207] __of_find_property: node=soc find=dma-ranges prop=#size-cells [ 0.509211] __of_find_property: node=soc find=dma-ranges prop=ranges [ 0.509215] __of_find_property: node=soc find=dma-ranges prop=name [ 0.509219] dma-ranges= (null) http://elixir.free-electrons.com/linux/latest/source/drivers/of/address.c#L838 What am I missing? Regards.
On 25/08/17 16:01, Mason wrote: > On 25/08/2017 09:54, Marc Zyngier wrote: >> On Thu, Aug 24 2017 at 10:53:16 pm BST, Mason <slash.tmp@free.fr> wrote: >>> On 24/08/2017 20:35, Ard Biesheuvel wrote: >>>> On 24 August 2017 at 18:51, Marc Gonzalez wrote: >>>>> On 24/08/2017 19:04, Bjorn Helgaas wrote: >>>>>> On Tue, Aug 22, 2017 Marc Zyngier wrote: >>>>>>> Marc Gonzalez wrote: >>>>>>>> On 22/08/2017 18:29, Marc Zyngier wrote: >>>>>>>>> On 22/08/17 15:56, Marc Gonzalez wrote: >>>>>>>>> >>>>>>>>>> #define SMP8759_MUX 0x48 >>>>>>>>>> #define SMP8759_TEST_OUT 0x74 >>>>>>>>>> +#define SMP8759_STATUS 0x80 >>>>>>>>>> +#define SMP8759_ENABLE 0xa0 >>>>>>>>>> +#define SMP8759_DOORBELL 0xa002e07c >>>>>>>>> >>>>>>>>> Why is this hardcoded and not coming from the device-tree, just like any >>>>>>>>> other address property? >>>>>>>> >>>>>>>> Since this bus address is software-configurable, I didn't think >>>>>>>> it belonged in the DT. Also, I didn't see anything similar in >>>>>>>> other binding docs, especially >>>>>>>> >>>>>>>> Documentation/devicetree/bindings/interrupt-controller/msi.txt >>>>>>> >>>>>>> If that's software configurable, how on Earth did you pick the address? >>>>>>> How do you ensure that it doesn't conflict with DMA? How is it >>>>>>> configured into the RC? >>>>>> >>>>>> But we *do* need to resolve this. This does seem like an address that >>>>>> shouldn't be hard-coded into the driver. Since this driver is >>>>>> programming the address into an MSI message, but not into the receiver >>>>>> of that message, there's a coordination issue between this driver and >>>>>> whatever other software does that receiver configuration. >>>>> >>>>> OK. I'll move the doorbell address to the DT for v11. >>>>> >>>>> What property should be used for this address? >>>>> >>>>> sigma,doorbell ? >>>>> >>>>> Or maybe I can put it in reg, since I have a 1:1 mapping >>>>> between bus and cpu addresses? >>>>> >>>>> git grep -i doorbell arch/arm/boot/dts/ arch/arm64/boot/dts/ >>>>> returns nothing. >>>> >>>> You haven't answered the question yet: you stated that the doorbell >>>> address is software configurable, yet your code does not seem to >>>> configure it. It only returns the doorbell address so that it gets >>>> communicated to the downstream devices. >>>> >>>> So how does the RC know which address is special, so it can trigger on >>>> inbound writes hitting that address and assert the SPI ? >>> >>> The CPU address of the MSI doorbell address is 0x2e07c >>> i.e. within the reg space of the PCIe controller block. >> >> Which you describe in DT already, right? So why aren't you using an >> offset in this region as your MSI ddorbell (potentially applying an >> offset, see below)? >> >> >>> As I discussed back in March, the RC implements an odd >>> bus-to-system mapping. >>> >>> RC BAR0 defines a window in PCI address space (max 1GB). >>> Accesses outside this window are silently ignored. >>> The window is divided into 8 "regions" and there are 8 >>> registers defining the offset into CPU space. >>> >>> In pseudo code, assuming pci_address is within the >>> window defined by BAR0: >>> >>> cpu_address map_bus_to_system(pci_address) >>> { >>> temp = pci_address - BAR0.base >>> region = temp / region_size >>> offset = temp % region_size >>> cpu_address = region_reg[region] + offset >>> return cpu_address >>> } >>> >>> The current setup is: >>> >>> DRAM at 0x80000000-0xa0000000 >>> BAR0.base = 0x80000000 >>> REGION[0] = 0x80000000 >>> REGION[1] = 0x88000000 >>> REGION[2] = 0x90000000 >>> REGION[3] = 0x98000000 >>> REGION[4] = 0x0 >>> >>> (This map means 1:1 identity for DRAM addresses.) >>> >>> Thus when a device writes to 0xa002e07c (region 4) >>> the write is forwarded to 0x2e07c. >> >> But how do you find out about the 0xa0000000 offset? You must make sure >> that the provided address is outside of RAM, should you end-up on a >> system more than 1GB of RAM. > > Robin wrote a prophetic post back in March: > http://lists.infradead.org/pipermail/linux-arm-kernel/2017-March/492965.html > >> The appropriate DT property would be "dma-ranges", i.e. >> >> pci@... { >> ... >> dma-ranges = <(PCI bus address) (CPU phys address) (size)>; >> } > > The dma-ranges property seems to be exactly what I'm looking for: > > Restrict DMA to the first X MB of RAM (use a bounce buffer > for other physical addresses). > > I added the following property to my PCIe node > > dma-ranges = <0x0 0x80000000 0x80000000 0x20000000>; > > with the intent to create a 1:1 mapping for [0x80000000, 0xa0000000[ > > But it does not work. Arg! > > My PCIe controller driver seems to be correctly calling of_dma_get_range: > > [ 0.520469] [<c03d85e8>] (of_dma_get_range) from [<c03d5ad8>] (of_dma_configure+0x48/0x234) > [ 0.520483] [<c03d5ad8>] (of_dma_configure) from [<c02fa154>] (pci_device_add+0xac/0x350) > [ 0.520493] [<c02fa154>] (pci_device_add) from [<c02fa488>] (pci_scan_single_device+0x90/0xb0) > [ 0.520501] [<c02fa488>] (pci_scan_single_device) from [<c02fa500>] (pci_scan_slot+0x58/0x100) > [ 0.520510] [<c02fa500>] (pci_scan_slot) from [<c02fb418>] (pci_scan_child_bus+0x20/0xf8) > [ 0.520519] [<c02fb418>] (pci_scan_child_bus) from [<c02fb6e8>] (pci_scan_root_bus_msi+0xcc/0xd8) > [ 0.520527] [<c02fb6e8>] (pci_scan_root_bus_msi) from [<c02fb70c>] (pci_scan_root_bus+0x18/0x20) > [ 0.520537] [<c02fb70c>] (pci_scan_root_bus) from [<c0310544>] (pci_host_common_probe+0xc8/0x314) > [ 0.520546] [<c0310544>] (pci_host_common_probe) from [<c0310ce8>] (tango_pcie_probe+0x148/0x350) > [ 0.520557] [<c0310ce8>] (tango_pcie_probe) from [<c034d398>] (platform_drv_probe+0x34/0x6c) > > of_dma_get_range() is called on the pcie node (which is expected) > but after parsing n_addr_cells and n_size_cells in the while loop, > the code jumps to the parent node ("soc")... while my property is > attached to the pcie node... This is not your driver calling of_dma_get_range(), this is the PCI core doing so in the act of DMA master configuration for a discovered *endpoint*. The fact that the "pass the host controller's OF node because we don't have one for the endpoint" bodge only works properly for dma-coherent and not dma-ranges is a known, but irrelevant, problem. If your host controller driver needs to discover its windows from DT to configure *itself*, it needs to parse dma-ranges itself; see pcie-iproc, pcie-racar, pcie-xgene, etc. for examples. Robin. > > [ 0.507754] of_dma_get_range: node=dfbf74cc np=dfbf74cc name=/soc/pcie@2e000 name2=/soc/pcie@2e000 > ... > [ 0.509162] __of_find_property: node=soc find=#address-cells prop=compatible > [ 0.509168] __of_find_property: node=soc find=#address-cells prop=interrupt-parent > [ 0.509173] __of_find_property: node=soc find=#address-cells prop=#address-cells > [ 0.509178] __of_find_property: node=soc find=#size-cells prop=compatible > [ 0.509182] __of_find_property: node=soc find=#size-cells prop=interrupt-parent > [ 0.509186] __of_find_property: node=soc find=#size-cells prop=#address-cells > [ 0.509190] __of_find_property: node=soc find=#size-cells prop=#size-cells > [ 0.509195] __of_find_property: node=soc find=dma-ranges prop=compatible > [ 0.509199] __of_find_property: node=soc find=dma-ranges prop=interrupt-parent > [ 0.509203] __of_find_property: node=soc find=dma-ranges prop=#address-cells > [ 0.509207] __of_find_property: node=soc find=dma-ranges prop=#size-cells > [ 0.509211] __of_find_property: node=soc find=dma-ranges prop=ranges > [ 0.509215] __of_find_property: node=soc find=dma-ranges prop=name > [ 0.509219] dma-ranges= (null) > > http://elixir.free-electrons.com/linux/latest/source/drivers/of/address.c#L838 > > What am I missing? > > Regards. >
On 25/08/2017 17:25, Robin Murphy wrote: > On 25/08/17 16:01, Mason wrote: > >> Robin wrote a prophetic post back in March: >> http://lists.infradead.org/pipermail/linux-arm-kernel/2017-March/492965.html >> >>> The appropriate DT property would be "dma-ranges", i.e. >>> >>> pci@... { >>> ... >>> dma-ranges = <(PCI bus address) (CPU phys address) (size)>; >>> } >> >> The dma-ranges property seems to be exactly what I'm looking for: >> >> Restrict DMA to the first X MB of RAM (use a bounce buffer >> for other physical addresses). >> >> I added the following property to my PCIe node >> >> dma-ranges = <0x0 0x80000000 0x80000000 0x20000000>; >> >> with the intent to create a 1:1 mapping for [0x80000000, 0xa0000000[ >> >> But it does not work. Arg! >> >> My PCIe controller driver seems to be correctly calling of_dma_get_range: >> >> [ 0.520469] [<c03d85e8>] (of_dma_get_range) from [<c03d5ad8>] (of_dma_configure+0x48/0x234) >> [ 0.520483] [<c03d5ad8>] (of_dma_configure) from [<c02fa154>] (pci_device_add+0xac/0x350) >> [ 0.520493] [<c02fa154>] (pci_device_add) from [<c02fa488>] (pci_scan_single_device+0x90/0xb0) >> [ 0.520501] [<c02fa488>] (pci_scan_single_device) from [<c02fa500>] (pci_scan_slot+0x58/0x100) >> [ 0.520510] [<c02fa500>] (pci_scan_slot) from [<c02fb418>] (pci_scan_child_bus+0x20/0xf8) >> [ 0.520519] [<c02fb418>] (pci_scan_child_bus) from [<c02fb6e8>] (pci_scan_root_bus_msi+0xcc/0xd8) >> [ 0.520527] [<c02fb6e8>] (pci_scan_root_bus_msi) from [<c02fb70c>] (pci_scan_root_bus+0x18/0x20) >> [ 0.520537] [<c02fb70c>] (pci_scan_root_bus) from [<c0310544>] (pci_host_common_probe+0xc8/0x314) >> [ 0.520546] [<c0310544>] (pci_host_common_probe) from [<c0310ce8>] (tango_pcie_probe+0x148/0x350) >> [ 0.520557] [<c0310ce8>] (tango_pcie_probe) from [<c034d398>] (platform_drv_probe+0x34/0x6c) >> >> of_dma_get_range() is called on the pcie node (which is expected) >> but after parsing n_addr_cells and n_size_cells in the while loop, >> the code jumps to the parent node ("soc")... while my property is >> attached to the pcie node... > > This is not your driver calling of_dma_get_range(), this is the PCI core > doing so in the act of DMA master configuration for a discovered > *endpoint*. The fact that the "pass the host controller's OF node > because we don't have one for the endpoint" bodge only works properly > for dma-coherent and not dma-ranges is a known, but irrelevant, problem. > > If your host controller driver needs to discover its windows from DT to > configure *itself*, it needs to parse dma-ranges itself; see pcie-iproc, > pcie-racar, pcie-xgene, etc. for examples. Yes, I'm aware that I need to do my own parsing of dma-ranges. I can use that information to configure BAR0.base and the region registers. But Linux needs to record my settings at some point, right? Otherwise, how does the DMA framework know that devices can only reach cpu addresses [0x80000000, 0xa0000000[ and when to use bounce buffers? What's preventing the XHCI driver from allocating memory outside of my "safe" range, and having the DMA framework blindly map that? Regards.
On 25/08/17 16:35, Mason wrote: > On 25/08/2017 17:25, Robin Murphy wrote: > >> On 25/08/17 16:01, Mason wrote: >> >>> Robin wrote a prophetic post back in March: >>> http://lists.infradead.org/pipermail/linux-arm-kernel/2017-March/492965.html >>> >>>> The appropriate DT property would be "dma-ranges", i.e. >>>> >>>> pci@... { >>>> ... >>>> dma-ranges = <(PCI bus address) (CPU phys address) (size)>; >>>> } >>> >>> The dma-ranges property seems to be exactly what I'm looking for: >>> >>> Restrict DMA to the first X MB of RAM (use a bounce buffer >>> for other physical addresses). >>> >>> I added the following property to my PCIe node >>> >>> dma-ranges = <0x0 0x80000000 0x80000000 0x20000000>; >>> >>> with the intent to create a 1:1 mapping for [0x80000000, 0xa0000000[ >>> >>> But it does not work. Arg! >>> >>> My PCIe controller driver seems to be correctly calling of_dma_get_range: >>> >>> [ 0.520469] [<c03d85e8>] (of_dma_get_range) from [<c03d5ad8>] (of_dma_configure+0x48/0x234) >>> [ 0.520483] [<c03d5ad8>] (of_dma_configure) from [<c02fa154>] (pci_device_add+0xac/0x350) >>> [ 0.520493] [<c02fa154>] (pci_device_add) from [<c02fa488>] (pci_scan_single_device+0x90/0xb0) >>> [ 0.520501] [<c02fa488>] (pci_scan_single_device) from [<c02fa500>] (pci_scan_slot+0x58/0x100) >>> [ 0.520510] [<c02fa500>] (pci_scan_slot) from [<c02fb418>] (pci_scan_child_bus+0x20/0xf8) >>> [ 0.520519] [<c02fb418>] (pci_scan_child_bus) from [<c02fb6e8>] (pci_scan_root_bus_msi+0xcc/0xd8) >>> [ 0.520527] [<c02fb6e8>] (pci_scan_root_bus_msi) from [<c02fb70c>] (pci_scan_root_bus+0x18/0x20) >>> [ 0.520537] [<c02fb70c>] (pci_scan_root_bus) from [<c0310544>] (pci_host_common_probe+0xc8/0x314) >>> [ 0.520546] [<c0310544>] (pci_host_common_probe) from [<c0310ce8>] (tango_pcie_probe+0x148/0x350) >>> [ 0.520557] [<c0310ce8>] (tango_pcie_probe) from [<c034d398>] (platform_drv_probe+0x34/0x6c) >>> >>> of_dma_get_range() is called on the pcie node (which is expected) >>> but after parsing n_addr_cells and n_size_cells in the while loop, >>> the code jumps to the parent node ("soc")... while my property is >>> attached to the pcie node... >> >> This is not your driver calling of_dma_get_range(), this is the PCI core >> doing so in the act of DMA master configuration for a discovered >> *endpoint*. The fact that the "pass the host controller's OF node >> because we don't have one for the endpoint" bodge only works properly >> for dma-coherent and not dma-ranges is a known, but irrelevant, problem. >> >> If your host controller driver needs to discover its windows from DT to >> configure *itself*, it needs to parse dma-ranges itself; see pcie-iproc, >> pcie-racar, pcie-xgene, etc. for examples. > > Yes, I'm aware that I need to do my own parsing of dma-ranges. > I can use that information to configure BAR0.base and the > region registers. > > But Linux needs to record my settings at some point, right? > Otherwise, how does the DMA framework know that devices can > only reach cpu addresses [0x80000000, 0xa0000000[ and when > to use bounce buffers? > > What's preventing the XHCI driver from allocating memory > outside of my "safe" range, and having the DMA framework > blindly map that? At the moment, nothing. Systems that have physical memory that is not visible in PCI mem space are having a bad time and will not go to space today. But that bears no relation to your MSI controller getting its doorbell address set appropriately. Robin.
On 25/08/2017 17:45, Robin Murphy wrote: > On 25/08/17 16:35, Mason wrote: > >> On 25/08/2017 17:25, Robin Murphy wrote: >> >>> On 25/08/17 16:01, Mason wrote: >>> >>>> Robin wrote a prophetic post back in March: >>>> http://lists.infradead.org/pipermail/linux-arm-kernel/2017-March/492965.html >>>> >>>>> The appropriate DT property would be "dma-ranges", i.e. >>>>> >>>>> pci@... { >>>>> ... >>>>> dma-ranges = <(PCI bus address) (CPU phys address) (size)>; >>>>> } >>>> >>>> The dma-ranges property seems to be exactly what I'm looking for: >>>> >>>> Restrict DMA to the first X MB of RAM (use a bounce buffer >>>> for other physical addresses). >>>> >>>> I added the following property to my PCIe node >>>> >>>> dma-ranges = <0x0 0x80000000 0x80000000 0x20000000>; >>>> >>>> with the intent to create a 1:1 mapping for [0x80000000, 0xa0000000[ >>>> >>>> But it does not work. Arg! >>>> >>>> My PCIe controller driver seems to be correctly calling of_dma_get_range: >>>> >>>> [ 0.520469] [<c03d85e8>] (of_dma_get_range) from [<c03d5ad8>] (of_dma_configure+0x48/0x234) >>>> [ 0.520483] [<c03d5ad8>] (of_dma_configure) from [<c02fa154>] (pci_device_add+0xac/0x350) >>>> [ 0.520493] [<c02fa154>] (pci_device_add) from [<c02fa488>] (pci_scan_single_device+0x90/0xb0) >>>> [ 0.520501] [<c02fa488>] (pci_scan_single_device) from [<c02fa500>] (pci_scan_slot+0x58/0x100) >>>> [ 0.520510] [<c02fa500>] (pci_scan_slot) from [<c02fb418>] (pci_scan_child_bus+0x20/0xf8) >>>> [ 0.520519] [<c02fb418>] (pci_scan_child_bus) from [<c02fb6e8>] (pci_scan_root_bus_msi+0xcc/0xd8) >>>> [ 0.520527] [<c02fb6e8>] (pci_scan_root_bus_msi) from [<c02fb70c>] (pci_scan_root_bus+0x18/0x20) >>>> [ 0.520537] [<c02fb70c>] (pci_scan_root_bus) from [<c0310544>] (pci_host_common_probe+0xc8/0x314) >>>> [ 0.520546] [<c0310544>] (pci_host_common_probe) from [<c0310ce8>] (tango_pcie_probe+0x148/0x350) >>>> [ 0.520557] [<c0310ce8>] (tango_pcie_probe) from [<c034d398>] (platform_drv_probe+0x34/0x6c) >>>> >>>> of_dma_get_range() is called on the pcie node (which is expected) >>>> but after parsing n_addr_cells and n_size_cells in the while loop, >>>> the code jumps to the parent node ("soc")... while my property is >>>> attached to the pcie node... >>> >>> This is not your driver calling of_dma_get_range(), this is the PCI core >>> doing so in the act of DMA master configuration for a discovered >>> *endpoint*. The fact that the "pass the host controller's OF node >>> because we don't have one for the endpoint" bodge only works properly >>> for dma-coherent and not dma-ranges is a known, but irrelevant, problem. >>> >>> If your host controller driver needs to discover its windows from DT to >>> configure *itself*, it needs to parse dma-ranges itself; see pcie-iproc, >>> pcie-racar, pcie-xgene, etc. for examples. >> >> Yes, I'm aware that I need to do my own parsing of dma-ranges. >> I can use that information to configure BAR0.base and the >> region registers. >> >> But Linux needs to record my settings at some point, right? >> Otherwise, how does the DMA framework know that devices can >> only reach cpu addresses [0x80000000, 0xa0000000[ and when >> to use bounce buffers? >> >> What's preventing the XHCI driver from allocating memory >> outside of my "safe" range, and having the DMA framework >> blindly map that? > > At the moment, nothing. Systems that have physical memory that is not > visible in PCI mem space are having a bad time and will not go to space > today. > > But that bears no relation to your MSI controller getting its doorbell > address set appropriately. OK, so this is what I propose for v11 in order to not hard code the MSI doorbell address (e.g. 0xa002e07c) I add the following property to the pcie node: dma-ranges = <0x0 0x80000000 0x80000000 0x20000000>; I.e. pci_addr = 0x80000000, cpu_addr = 0x80000000, len=0x20000000 Then in the PCIe driver, I parse dma-ranges. Consequently MSI_doorbell_addr = cpu_addr + len + res.start + 0x7c Bjorn, Marc, Robin, is that an acceptable solution? Tangent: Robin, for my own education, how does one configure the DMA framework to use bounce buffers for certain addresses? Regards.
On Fri, Aug 25 2017 at 6:44:27 pm BST, Mason <slash.tmp@free.fr> wrote: > On 25/08/2017 17:45, Robin Murphy wrote: > >> On 25/08/17 16:35, Mason wrote: >> >>> On 25/08/2017 17:25, Robin Murphy wrote: >>> >>>> On 25/08/17 16:01, Mason wrote: >>>> >>>>> Robin wrote a prophetic post back in March: >>>>> http://lists.infradead.org/pipermail/linux-arm-kernel/2017-March/492965.html >>>>> >>>>>> The appropriate DT property would be "dma-ranges", i.e. >>>>>> >>>>>> pci@... { >>>>>> ... >>>>>> dma-ranges = <(PCI bus address) (CPU phys address) (size)>; >>>>>> } >>>>> >>>>> The dma-ranges property seems to be exactly what I'm looking for: >>>>> >>>>> Restrict DMA to the first X MB of RAM (use a bounce buffer >>>>> for other physical addresses). >>>>> >>>>> I added the following property to my PCIe node >>>>> >>>>> dma-ranges = <0x0 0x80000000 0x80000000 0x20000000>; >>>>> >>>>> with the intent to create a 1:1 mapping for [0x80000000, 0xa0000000[ >>>>> >>>>> But it does not work. Arg! >>>>> >>>>> My PCIe controller driver seems to be correctly calling of_dma_get_range: >>>>> >>>>> [ 0.520469] [<c03d85e8>] (of_dma_get_range) from [<c03d5ad8>] >>>>> (of_dma_configure+0x48/0x234) >>>>> [ 0.520483] [<c03d5ad8>] (of_dma_configure) from [<c02fa154>] >>>>> (pci_device_add+0xac/0x350) >>>>> [ 0.520493] [<c02fa154>] (pci_device_add) from [<c02fa488>] >>>>> (pci_scan_single_device+0x90/0xb0) >>>>> [ 0.520501] [<c02fa488>] (pci_scan_single_device) from >>>>> [<c02fa500>] (pci_scan_slot+0x58/0x100) >>>>> [ 0.520510] [<c02fa500>] (pci_scan_slot) from [<c02fb418>] >>>>> (pci_scan_child_bus+0x20/0xf8) >>>>> [ 0.520519] [<c02fb418>] (pci_scan_child_bus) from [<c02fb6e8>] >>>>> (pci_scan_root_bus_msi+0xcc/0xd8) >>>>> [ 0.520527] [<c02fb6e8>] (pci_scan_root_bus_msi) from >>>>> [<c02fb70c>] (pci_scan_root_bus+0x18/0x20) >>>>> [ 0.520537] [<c02fb70c>] (pci_scan_root_bus) from [<c0310544>] >>>>> (pci_host_common_probe+0xc8/0x314) >>>>> [ 0.520546] [<c0310544>] (pci_host_common_probe) from >>>>> [<c0310ce8>] (tango_pcie_probe+0x148/0x350) >>>>> [ 0.520557] [<c0310ce8>] (tango_pcie_probe) from [<c034d398>] >>>>> (platform_drv_probe+0x34/0x6c) >>>>> >>>>> of_dma_get_range() is called on the pcie node (which is expected) >>>>> but after parsing n_addr_cells and n_size_cells in the while loop, >>>>> the code jumps to the parent node ("soc")... while my property is >>>>> attached to the pcie node... >>>> >>>> This is not your driver calling of_dma_get_range(), this is the PCI core >>>> doing so in the act of DMA master configuration for a discovered >>>> *endpoint*. The fact that the "pass the host controller's OF node >>>> because we don't have one for the endpoint" bodge only works properly >>>> for dma-coherent and not dma-ranges is a known, but irrelevant, problem. >>>> >>>> If your host controller driver needs to discover its windows from DT to >>>> configure *itself*, it needs to parse dma-ranges itself; see pcie-iproc, >>>> pcie-racar, pcie-xgene, etc. for examples. >>> >>> Yes, I'm aware that I need to do my own parsing of dma-ranges. >>> I can use that information to configure BAR0.base and the >>> region registers. >>> >>> But Linux needs to record my settings at some point, right? >>> Otherwise, how does the DMA framework know that devices can >>> only reach cpu addresses [0x80000000, 0xa0000000[ and when >>> to use bounce buffers? >>> >>> What's preventing the XHCI driver from allocating memory >>> outside of my "safe" range, and having the DMA framework >>> blindly map that? >> >> At the moment, nothing. Systems that have physical memory that is not >> visible in PCI mem space are having a bad time and will not go to space >> today. >> >> But that bears no relation to your MSI controller getting its doorbell >> address set appropriately. > > OK, so this is what I propose for v11 in order to not > hard code the MSI doorbell address (e.g. 0xa002e07c) > > I add the following property to the pcie node: > > dma-ranges = <0x0 0x80000000 0x80000000 0x20000000>; > > I.e. pci_addr = 0x80000000, cpu_addr = 0x80000000, len=0x20000000 > > Then in the PCIe driver, I parse dma-ranges. > > Consequently > > MSI_doorbell_addr = cpu_addr + len + res.start + 0x7c > > Bjorn, Marc, Robin, is that an acceptable solution? It seems to work, but I still have my doubts about this BAR0.base and the associated regions. Are these regions so hardcoded in HW that the RC cannot DMA outside of this 1GB region? Or can it be reconfigured by some SW agent to cover more RAM, should someone decide that 1GB is on the "too little" side? If the former is true, the HW is remarkably busted and/or inflexible. If the latter is true, then the dma-ranges property feels very fragile, as it must be kept in sync with the amount of memory that the system has. M.
On 26/08/2017 15:08, Marc Zyngier wrote: > On Aug 25 2017 at 18:44, Mason wrote: > >> On 25/08/2017 17:45, Robin Murphy wrote: >> >>> On 25/08/17 16:35, Mason wrote: >>> >>>> On 25/08/2017 17:25, Robin Murphy wrote: >>>> >>>>> On 25/08/17 16:01, Mason wrote: >>>>> >>>>>> Robin wrote a prophetic post back in March: >>>>>> http://lists.infradead.org/pipermail/linux-arm-kernel/2017-March/492965.html >>>>>> >>>>>>> The appropriate DT property would be "dma-ranges", i.e. >>>>>>> >>>>>>> pci@... { >>>>>>> ... >>>>>>> dma-ranges = <(PCI bus address) (CPU phys address) (size)>; >>>>>>> } >>>>>> >>>>>> The dma-ranges property seems to be exactly what I'm looking for: >>>>>> >>>>>> Restrict DMA to the first X MB of RAM (use a bounce buffer >>>>>> for other physical addresses). >>>>>> >>>>>> I added the following property to my PCIe node >>>>>> >>>>>> dma-ranges = <0x0 0x80000000 0x80000000 0x20000000>; >>>>>> >>>>>> with the intent to create a 1:1 mapping for [0x80000000, 0xa0000000[ >>>>>> >>>>>> But it does not work. Arg! >>>>>> >>>>>> My PCIe controller driver seems to be correctly calling of_dma_get_range: >>>>>> >>>>>> [ 0.520469] [<c03d85e8>] (of_dma_get_range) from [<c03d5ad8>] (of_dma_configure+0x48/0x234) >>>>>> [ 0.520483] [<c03d5ad8>] (of_dma_configure) from [<c02fa154>] (pci_device_add+0xac/0x350) >>>>>> [ 0.520493] [<c02fa154>] (pci_device_add) from [<c02fa488>] (pci_scan_single_device+0x90/0xb0) >>>>>> [ 0.520501] [<c02fa488>] (pci_scan_single_device) from [<c02fa500>] (pci_scan_slot+0x58/0x100) >>>>>> [ 0.520510] [<c02fa500>] (pci_scan_slot) from [<c02fb418>] (pci_scan_child_bus+0x20/0xf8) >>>>>> [ 0.520519] [<c02fb418>] (pci_scan_child_bus) from [<c02fb6e8>] (pci_scan_root_bus_msi+0xcc/0xd8) >>>>>> [ 0.520527] [<c02fb6e8>] (pci_scan_root_bus_msi) from> [<c02fb70c>] (pci_scan_root_bus+0x18/0x20) >>>>>> [ 0.520537] [<c02fb70c>] (pci_scan_root_bus) from [<c0310544>] (pci_host_common_probe+0xc8/0x314) >>>>>> [ 0.520546] [<c0310544>] (pci_host_common_probe) from [<c0310ce8>] (tango_pcie_probe+0x148/0x350) >>>>>> [ 0.520557] [<c0310ce8>] (tango_pcie_probe) from [<c034d398>] (platform_drv_probe+0x34/0x6c) >>>>>> >>>>>> of_dma_get_range() is called on the pcie node (which is expected) >>>>>> but after parsing n_addr_cells and n_size_cells in the while loop, >>>>>> the code jumps to the parent node ("soc")... while my property is >>>>>> attached to the pcie node... >>>>> >>>>> This is not your driver calling of_dma_get_range(), this is the PCI core >>>>> doing so in the act of DMA master configuration for a discovered >>>>> *endpoint*. The fact that the "pass the host controller's OF node >>>>> because we don't have one for the endpoint" bodge only works properly >>>>> for dma-coherent and not dma-ranges is a known, but irrelevant, problem. >>>>> >>>>> If your host controller driver needs to discover its windows from DT to >>>>> configure *itself*, it needs to parse dma-ranges itself; see pcie-iproc, >>>>> pcie-racar, pcie-xgene, etc. for examples. >>>> >>>> Yes, I'm aware that I need to do my own parsing of dma-ranges. >>>> I can use that information to configure BAR0.base and the >>>> region registers. >>>> >>>> But Linux needs to record my settings at some point, right? >>>> Otherwise, how does the DMA framework know that devices can >>>> only reach cpu addresses [0x80000000, 0xa0000000[ and when >>>> to use bounce buffers? >>>> >>>> What's preventing the XHCI driver from allocating memory >>>> outside of my "safe" range, and having the DMA framework >>>> blindly map that? >>> >>> At the moment, nothing. Systems that have physical memory that is not >>> visible in PCI mem space are having a bad time and will not go to space >>> today. >>> >>> But that bears no relation to your MSI controller getting its doorbell >>> address set appropriately. >> >> OK, so this is what I propose for v11 in order to not >> hard code the MSI doorbell address (e.g. 0xa002e07c) >> >> I add the following property to the pcie node: >> >> dma-ranges = <0x0 0x80000000 0x80000000 0x20000000>; >> >> I.e. pci_addr = 0x80000000, cpu_addr = 0x80000000, len=0x20000000 >> >> Then in the PCIe driver, I parse dma-ranges. >> >> Consequently >> >> MSI_doorbell_addr = cpu_addr + len + res.start + 0x7c >> >> Bjorn, Marc, Robin, is that an acceptable solution? > > It seems to work, but I still have my doubts about this BAR0.base and > the associated regions. Are these regions so hardcoded in HW that the RC > cannot DMA outside of this 1GB region? Or can it be reconfigured by some > SW agent to cover more RAM, should someone decide that 1GB is on the > "too little" side? > > If the former is true, the HW is remarkably busted and/or inflexible. This HW block has already been deemed insane because of the muxing of mem and config space... So you're late to the party :-) I wouldn't call the regions "hard-coded" since they are software-configurable to point anywhere in the CPU bus. (Although I'm not sure if that's any use, since the DMA framework seems to expect a 1:1 mapping.) But the other side (PCI bus) is quite inflexible: accesses to addresses outside the window defined by BAR0 are silently ignored, no working around that :-( > If the latter is true, then the dma-ranges property feels very fragile, as > it must be kept in sync with the amount of memory that the system has. I'm confused. As I pointed out, the dma-ranges in the pcie node is ignored by the DMA framework. And Robin confirmed that "Systems that have physical memory that is not visible in PCI mem space are having a bad time and will not go to space today." So there are several setups where something is bound to break: 1) Linux manages more than 1 GB (contiguous) => because one region needs to point to the doorbell area, so 128 MB are wasted. 2) Linux manages non-contiguous memory => e.g. 128MB@0x80000000 + 128MB@0xc0000000 That's why I've asked about bounce buffers. The system I test on boots with mem=512MB Regards.
diff --git a/drivers/pci/host/pcie-tango.c b/drivers/pci/host/pcie-tango.c index 6bbb81f06a53..d672271ad719 100644 --- a/drivers/pci/host/pcie-tango.c +++ b/drivers/pci/host/pcie-tango.c @@ -1,12 +1,170 @@ +#include <linux/irqchip/chained_irq.h> +#include <linux/irqdomain.h> #include <linux/pci-ecam.h> #include <linux/delay.h> +#include <linux/msi.h> #include <linux/of.h> +#define MSI_MAX 256 + #define SMP8759_MUX 0x48 #define SMP8759_TEST_OUT 0x74 +#define SMP8759_STATUS 0x80 +#define SMP8759_ENABLE 0xa0 +#define SMP8759_DOORBELL 0xa002e07c struct tango_pcie { - void __iomem *base; + DECLARE_BITMAP(used_msi, MSI_MAX); + spinlock_t used_msi_lock; + void __iomem *base; + struct irq_domain *dom; +}; + +static void tango_msi_isr(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct tango_pcie *pcie = irq_desc_get_handler_data(desc); + unsigned long status, base, virq, idx, pos = 0; + + chained_irq_enter(chip, desc); + spin_lock(&pcie->used_msi_lock); + + while ((pos = find_next_bit(pcie->used_msi, MSI_MAX, pos)) < MSI_MAX) { + base = round_down(pos, 32); + status = readl_relaxed(pcie->base + SMP8759_STATUS + base / 8); + for_each_set_bit(idx, &status, 32) { + virq = irq_find_mapping(pcie->dom, base + idx); + generic_handle_irq(virq); + } + pos = base + 32; + } + + spin_unlock(&pcie->used_msi_lock); + chained_irq_exit(chip, desc); +} + +static void tango_ack(struct irq_data *d) +{ + struct tango_pcie *pcie = d->chip_data; + u32 offset = (d->hwirq / 32) * 4; + u32 bit = BIT(d->hwirq % 32); + + writel_relaxed(bit, pcie->base + SMP8759_STATUS + offset); +} + +static void update_msi_enable(struct irq_data *d, bool unmask) +{ + unsigned long flags; + struct tango_pcie *pcie = d->chip_data; + u32 offset = (d->hwirq / 32) * 4; + u32 bit = BIT(d->hwirq % 32); + u32 val; + + spin_lock_irqsave(&pcie->used_msi_lock, flags); + val = readl_relaxed(pcie->base + SMP8759_ENABLE + offset); + val = unmask ? val | bit : val & ~bit; + writel_relaxed(val, pcie->base + SMP8759_ENABLE + offset); + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); +} + +static void tango_mask(struct irq_data *d) +{ + update_msi_enable(d, false); +} + +static void tango_unmask(struct irq_data *d) +{ + update_msi_enable(d, true); +} + +static int tango_set_affinity(struct irq_data *d, const struct cpumask *mask, + bool force) +{ + return -EINVAL; +} + +static void tango_compose_msi_msg(struct irq_data *d, struct msi_msg *msg) +{ + msg->address_lo = lower_32_bits(SMP8759_DOORBELL); + msg->address_hi = upper_32_bits(SMP8759_DOORBELL); + msg->data = d->hwirq; +} + +static struct irq_chip tango_chip = { + .irq_ack = tango_ack, + .irq_mask = tango_mask, + .irq_unmask = tango_unmask, + .irq_set_affinity = tango_set_affinity, + .irq_compose_msi_msg = tango_compose_msi_msg, +}; + +static void msi_ack(struct irq_data *d) +{ + irq_chip_ack_parent(d); +} + +static void msi_mask(struct irq_data *d) +{ + pci_msi_mask_irq(d); + irq_chip_mask_parent(d); +} + +static void msi_unmask(struct irq_data *d) +{ + pci_msi_unmask_irq(d); + irq_chip_unmask_parent(d); +} + +static struct irq_chip msi_chip = { + .name = "MSI", + .irq_ack = msi_ack, + .irq_mask = msi_mask, + .irq_unmask = msi_unmask, +}; + +static struct msi_domain_info msi_dom_info = { + .flags = MSI_FLAG_PCI_MSIX + | MSI_FLAG_USE_DEF_DOM_OPS + | MSI_FLAG_USE_DEF_CHIP_OPS, + .chip = &msi_chip, +}; + +static int tango_irq_domain_alloc(struct irq_domain *dom, unsigned int virq, + unsigned int nr_irqs, void *args) +{ + struct tango_pcie *pcie = dom->host_data; + unsigned long flags; + int pos; + + spin_lock_irqsave(&pcie->used_msi_lock, flags); + pos = find_first_zero_bit(pcie->used_msi, MSI_MAX); + if (pos >= MSI_MAX) { + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); + return -ENOSPC; + } + __set_bit(pos, pcie->used_msi); + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); + irq_domain_set_info(dom, virq, pos, &tango_chip, + pcie, handle_edge_irq, NULL, NULL); + + return 0; +} + +static void tango_irq_domain_free(struct irq_domain *dom, unsigned int virq, + unsigned int nr_irqs) +{ + unsigned long flags; + struct irq_data *d = irq_domain_get_irq_data(dom, virq); + struct tango_pcie *pcie = d->chip_data; + + spin_lock_irqsave(&pcie->used_msi_lock, flags); + __clear_bit(d->hwirq, pcie->used_msi); + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); +} + +static const struct irq_domain_ops dom_ops = { + .alloc = tango_irq_domain_alloc, + .free = tango_irq_domain_free, }; static int smp8759_config_read(struct pci_bus *bus, unsigned int devfn, @@ -76,7 +234,9 @@ static int tango_pcie_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct tango_pcie *pcie; struct resource *res; - int ret; + struct irq_domain *msi_dom, *irq_dom; + struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node); + int ret, reg, virq; dev_warn(dev, "simultaneous PCI config and MMIO accesses may cause data corruption\n"); add_taint(TAINT_CRAP, LOCKDEP_STILL_OK); @@ -95,6 +255,33 @@ static int tango_pcie_probe(struct platform_device *pdev) if (!tango_pcie_link_up(pcie)) return -ENODEV; + for (reg = 0; reg < MSI_MAX / 8; reg += 4) + writel_relaxed(0, pcie->base + SMP8759_ENABLE + reg); + + virq = platform_get_irq(pdev, 1); + if (virq <= 0) { + dev_err(dev, "Failed to map IRQ\n"); + return -ENXIO; + } + + irq_dom = irq_domain_create_linear(fwnode, MSI_MAX, &dom_ops, pcie); + if (!irq_dom) { + dev_err(dev, "Failed to create IRQ domain\n"); + return -ENOMEM; + } + + msi_dom = pci_msi_create_irq_domain(fwnode, &msi_dom_info, irq_dom); + if (!msi_dom) { + dev_err(dev, "Failed to create MSI domain\n"); + irq_domain_remove(irq_dom); + return -ENOMEM; + } + + pcie->dom = irq_dom; + spin_lock_init(&pcie->used_msi_lock); + + irq_set_chained_handler_and_data(virq, tango_msi_isr, pcie); + return pci_host_common_probe(pdev, &smp8759_ecam_ops); }
The MSI controller in Tango supports 256 message-signaled interrupts and a single doorbell address. Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com> --- Changes from v9 to v10 - Start from Bjorn's cleanup branch - Clean up the MSI init and unused statements - Based on top of v4.13-rc6 Hello Bjorn, This patch is almost identical to the patch reviewed by Marc Zyngier on June 14 (10 weeks ago). I'm not sure he could review this patch again in time for 4.14 (given his work load these past few weeks). The host bridge part landed in 4.13, but the driver is useless without MSI support (legacy interrupts are not supported). Can you take it for 4.14? Regards. --- drivers/pci/host/pcie-tango.c | 191 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 189 insertions(+), 2 deletions(-)