diff mbox series

[v5,08/12] PCI: imx6: Config look up table(LUT) to support MSI ITS and IOMMU for i.MX95

Message ID 20240528-pci2_upstream-v5-8-750aa7edb8e2@nxp.com (mailing list archive)
State Superseded
Headers show
Series PCI: imx6: Fix\rename\clean up and add lut information for imx95 | expand

Commit Message

Frank Li May 28, 2024, 7:39 p.m. UTC
For the i.MX95, configuration of a LUT is necessary to convert Bus Device
Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
This involves examining the msi-map and smmu-map to ensure consistent
mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
registers are configured. In the absence of an msi-map, the built-in MSI
controller is utilized as a fallback.

Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
upon the appearance of a new PCI device and when the bus is an iMX6 PCI
controller. This function configures the correct LUT based on Device Tree
Settings (DTS).

Signed-off-by: Frank Li <Frank.Li@nxp.com>
---
 drivers/pci/controller/dwc/pci-imx6.c | 175 +++++++++++++++++++++++++++++++++-
 1 file changed, 174 insertions(+), 1 deletion(-)

Comments

Bjorn Helgaas May 30, 2024, 11:08 p.m. UTC | #1
[+cc IOMMU and pcie-apple.c folks for comment]

On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> This involves examining the msi-map and smmu-map to ensure consistent
> mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> registers are configured. In the absence of an msi-map, the built-in MSI
> controller is utilized as a fallback.
> 
> Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> controller. This function configures the correct LUT based on Device Tree
> Settings (DTS).

This scheme is pretty similar to apple_pcie_bus_notifier().  If we
have to do this, I wish it were *more* similar, i.e., copy the
function names, bitmap tracking, code structure, etc.

I don't really know how stream IDs work, but I assume they are used on
most or all arm64 platforms, so I'm a little surprised that of all the
PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
this notifier.  

There's this path, which is pretty generic and does at least the
of_map_id() part of what you're doing in imx_pcie_add_device():

    __driver_probe_device
      really_probe
        pci_dma_configure                       # pci_bus_type.dma_configure
          of_dma_configure
            of_dma_configure_id
              of_iommu_configure
                of_pci_iommu_init
                  of_iommu_configure_dev_id
                    of_map_id
                    of_iommu_xlate
                      ops = iommu_ops_from_fwnode
                      iommu_fwspec_init
                      ops->of_xlate(dev, iommu_spec)

Maybe this needs to be extended somehow with a hook to do the
device-specific work like updating the LUT?  Just speculating here,
the IOMMU folks will know how this is expected to work.

Some typos and minor comments below.

> Signed-off-by: Frank Li <Frank.Li@nxp.com>
> ---
>  drivers/pci/controller/dwc/pci-imx6.c | 175 +++++++++++++++++++++++++++++++++-
>  1 file changed, 174 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
> index 29309ad0e352b..8ecc00049e20b 100644
> --- a/drivers/pci/controller/dwc/pci-imx6.c
> +++ b/drivers/pci/controller/dwc/pci-imx6.c
> @@ -54,6 +54,22 @@
>  #define IMX95_PE0_GEN_CTRL_3			0x1058
>  #define IMX95_PCIE_LTSSM_EN			BIT(0)
>  
> +#define IMX95_PE0_LUT_ACSCTRL			0x1008
> +#define IMX95_PEO_LUT_RWA			BIT(16)
> +#define IMX95_PE0_LUT_ENLOC			GENMASK(4, 0)
> +
> +#define IMX95_PE0_LUT_DATA1			0x100c
> +#define IMX95_PE0_LUT_VLD			BIT(31)
> +#define IMX95_PE0_LUT_DAC_ID			GENMASK(10, 8)
> +#define IMX95_PE0_LUT_STREAM_ID			GENMASK(5, 0)
> +
> +#define IMX95_PE0_LUT_DATA2			0x1010
> +#define IMX95_PE0_LUT_REQID			GENMASK(31, 16)
> +#define IMX95_PE0_LUT_MASK			GENMASK(15, 0)
> +
> +#define IMX95_SID_MASK				GENMASK(5, 0)
> +#define IMX95_MAX_LUT				32
> +
>  #define to_imx_pcie(x)	dev_get_drvdata((x)->dev)
>  
>  enum imx_pcie_variants {
> @@ -79,6 +95,7 @@ enum imx_pcie_variants {
>  #define IMX_PCIE_FLAG_HAS_PHY_RESET		BIT(5)
>  #define IMX_PCIE_FLAG_HAS_SERDES		BIT(6)
>  #define IMX_PCIE_FLAG_SUPPORT_64BIT		BIT(7)
> +#define IMX_PCIE_FLAG_MONITOR_DEV		BIT(8)
>  
>  #define imx_check_flag(pci, val)     (pci->drvdata->flags & val)
>  
> @@ -132,6 +149,8 @@ struct imx_pcie {
>  	struct device		*pd_pcie_phy;
>  	struct phy		*phy;
>  	const struct imx_pcie_drvdata *drvdata;
> +
> +	struct mutex		lock;
>  };
>  
>  /* Parameters for the waiting for PCIe PHY PLL to lock on i.MX7 */
> @@ -215,6 +234,66 @@ static int imx95_pcie_init_phy(struct imx_pcie *imx_pcie)
>  	return 0;
>  }
>  
> +static int imx_pcie_config_lut(struct imx_pcie *imx_pcie, u16 reqid, u8 sid)
> +{
> +	struct dw_pcie *pci = imx_pcie->pci;
> +	struct device *dev = pci->dev;
> +	u32 data1, data2;
> +	int i;
> +
> +	if (sid >= 64) {
> +		dev_err(dev, "Invalid SID for index %d\n", sid);
> +		return -EINVAL;
> +	}
> +
> +	guard(mutex)(&imx_pcie->lock);
> +
> +	for (i = 0; i < IMX95_MAX_LUT; i++) {
> +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, IMX95_PEO_LUT_RWA | i);
> +
> +		regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, &data1);
> +		if (data1 & IMX95_PE0_LUT_VLD)
> +			continue;
> +
> +		data1 = FIELD_PREP(IMX95_PE0_LUT_DAC_ID, 0);
> +		data1 |= FIELD_PREP(IMX95_PE0_LUT_STREAM_ID, sid);
> +		data1 |= IMX95_PE0_LUT_VLD;
> +
> +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, data1);
> +
> +		data2 = 0xffff;
> +		data2 |= FIELD_PREP(IMX95_PE0_LUT_REQID, reqid);
> +
> +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, data2);
> +
> +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, i);
> +
> +		return 0;
> +	}
> +
> +	dev_err(dev, "All lut already used\n");
> +	return -EINVAL;
> +}
> +
> +static void imx_pcie_remove_lut(struct imx_pcie *imx_pcie, u16 reqid)
> +{
> +	u32 data2 = 0;
> +	int i;
> +
> +	guard(mutex)(&imx_pcie->lock);
> +
> +	for (i = 0; i < IMX95_MAX_LUT; i++) {
> +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, IMX95_PEO_LUT_RWA | i);
> +
> +		regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, &data2);
> +		if (FIELD_GET(IMX95_PE0_LUT_REQID, data2) == reqid) {
> +			regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, 0);
> +			regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, 0);
> +			regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, i);
> +		}
> +	}
> +}
> +
>  static void imx_pcie_configure_type(struct imx_pcie *imx_pcie)
>  {
>  	const struct imx_pcie_drvdata *drvdata = imx_pcie->drvdata;
> @@ -1232,6 +1311,85 @@ static int imx_pcie_resume_noirq(struct device *dev)
>  	return 0;
>  }
>  
> +static bool imx_pcie_match_device(struct pci_bus *bus);

Can you add the imx_pcie_match_device() earlier in the file so we
don't need this forward declaration?

> +static int imx_pcie_add_device(struct imx_pcie *imx_pcie, struct pci_dev *pdev)
> +{
> +	u32 sid_i = 0, sid_m = 0, rid = pci_dev_id(pdev);
> +	struct device *dev = imx_pcie->pci->dev;
> +	int err;
> +
> +	err = of_map_id(dev->of_node, rid, "iommu-map", "iommu-map-mask", NULL, &sid_i);
> +	if (err)
> +		return err;
> +
> +	err = of_map_id(dev->of_node, rid, "msi-map", "msi-map-mask", NULL, &sid_m);
> +	if (err)
> +		return err;
> +
> +	if (sid_i != rid && sid_m != rid)
> +		if ((sid_i & IMX95_SID_MASK) != (sid_m & IMX95_SID_MASK)) {
> +			dev_err(dev, "its and iommu stream id miss match, please check dts file\n");
> +			return -EINVAL;
> +		}
> +
> +	/* if iommu-map is not existed then use msi-map's stream id*/

Capitalize consistently, e.g., the most comments in this file start
with a capital letter.

s/is not existed/does not exist/

Add space before closing */

> +	if (sid_i == rid)
> +		sid_i = sid_m;
> +
> +	sid_i &= IMX95_SID_MASK;
> +
> +	if (sid_i != rid)
> +		return imx_pcie_config_lut(imx_pcie, rid, sid_i);
> +
> +	/* Use dwc built-in MSI controller */
> +	return 0;
> +}
> +
> +static void imx_pcie_del_device(struct imx_pcie *imx_pcie, struct pci_dev *pdev)
> +{
> +	imx_pcie_remove_lut(imx_pcie, pci_dev_id(pdev));
> +}
> +
> +
> +static int imx_pcie_bus_notifier(struct notifier_block *nb, unsigned long action, void *data)
> +{
> +	struct pci_host_bridge *host;
> +	struct imx_pcie *imx_pcie;
> +	struct pci_dev *pdev;
> +	int err;
> +
> +	pdev = to_pci_dev(data);
> +	host = pci_find_host_bridge(pdev->bus);
> +
> +	if (!imx_pcie_match_device(host->bus))
> +		return NOTIFY_OK;
> +
> +	imx_pcie = to_imx_pcie(to_dw_pcie_from_pp(host->sysdata));
> +
> +	if (!imx_check_flag(imx_pcie, IMX_PCIE_FLAG_MONITOR_DEV))
> +		return NOTIFY_OK;
> +
> +	switch (action) {
> +	case BUS_NOTIFY_ADD_DEVICE:
> +		err = imx_pcie_add_device(imx_pcie, pdev);
> +		if (err)
> +			return notifier_from_errno(err);
> +		break;
> +	case BUS_NOTIFY_DEL_DEVICE:
> +		imx_pcie_del_device(imx_pcie, pdev);
> +		break;
> +	default:
> +		return NOTIFY_DONE;
> +	}
> +
> +	return NOTIFY_OK;
> +}
> +
> +static struct notifier_block imx_pcie_nb = {
> +	.notifier_call = imx_pcie_bus_notifier,
> +};
> +
>  static const struct dev_pm_ops imx_pcie_pm_ops = {
>  	NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_pcie_suspend_noirq,
>  				  imx_pcie_resume_noirq)
> @@ -1264,6 +1422,8 @@ static int imx_pcie_probe(struct platform_device *pdev)
>  	imx_pcie->pci = pci;
>  	imx_pcie->drvdata = of_device_get_match_data(dev);
>  
> +	mutex_init(&imx_pcie->lock);
> +
>  	/* Find the PHY if one is defined, only imx7d uses it */
>  	np = of_parse_phandle(node, "fsl,imx7d-pcie-phy", 0);
>  	if (np) {
> @@ -1551,7 +1711,8 @@ static const struct imx_pcie_drvdata drvdata[] = {
>  	},
>  	[IMX95] = {
>  		.variant = IMX95,
> -		.flags = IMX_PCIE_FLAG_HAS_SERDES,
> +		.flags = IMX_PCIE_FLAG_HAS_SERDES |
> +			 IMX_PCIE_FLAG_MONITOR_DEV,
>  		.clk_names = imx8mq_clks,
>  		.clks_cnt = ARRAY_SIZE(imx8mq_clks),
>  		.ltssm_off = IMX95_PE0_GEN_CTRL_3,
> @@ -1687,6 +1848,8 @@ DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_SYNOPSYS, 0xabcd,
>  
>  static int __init imx_pcie_init(void)
>  {
> +	int ret;
> +
>  #ifdef CONFIG_ARM
>  	struct device_node *np;
>  
> @@ -1705,7 +1868,17 @@ static int __init imx_pcie_init(void)
>  	hook_fault_code(8, imx6q_pcie_abort_handler, SIGBUS, 0,
>  			"external abort on non-linefetch");
>  #endif
> +	ret = bus_register_notifier(&pci_bus_type, &imx_pcie_nb);
> +	if (ret)
> +		return ret;

I think this should go in imx6_pcie_probe().

>  	return platform_driver_register(&imx_pcie_driver);
>  }
> +
> +static void __exit imx_pcie_exit(void)
> +{
> +	bus_unregister_notifier(&pci_bus_type, &imx_pcie_nb);

It looks like this driver is removable?

What happens when an external abort occurs after the
imx6q_pcie_abort_handler() text is removed?

> +}
> +
>  device_initcall(imx_pcie_init);
> +__exitcall(imx_pcie_exit);
> 
> -- 
> 2.34.1
>
Marc Zyngier May 31, 2024, 1:52 p.m. UTC | #2
On Fri, 31 May 2024 00:08:32 +0100,
Bjorn Helgaas <helgaas@kernel.org> wrote:
> 
> [+cc IOMMU and pcie-apple.c folks for comment]
> 
> On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > This involves examining the msi-map and smmu-map to ensure consistent
> > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > registers are configured. In the absence of an msi-map, the built-in MSI
> > controller is utilized as a fallback.
> > 
> > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > controller. This function configures the correct LUT based on Device Tree
> > Settings (DTS).
> 
> This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> have to do this, I wish it were *more* similar, i.e., copy the
> function names, bitmap tracking, code structure, etc.
> 
> I don't really know how stream IDs work, but I assume they are used on
> most or all arm64 platforms, so I'm a little surprised that of all the
> PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> this notifier.

That's because on sane platforms, PCI's RID and the IOMMU's SID are
the same thing, and there is no need to do anything bizarre. In the
worse case, there is a static transformation that can be applied (IORT
for ACPI, or iommu-map for DT).

Some "creative" systems such as the Apple stuff require some extra
side-band configuration because they don't know what a RID is at all
(the RID isn't conveyed to the IOMMU), nor is there a static (baked in
HW) transformation between RID and SID.

So there is a widget on the side that performs the conversion, and
this widget needs to be programmed. The way it works is that the
driver parses the iommu-map to find the expected and arbitrary) SID
on the IOMMU side for a given RID, and programs the association in the
RID-to-SID mapper. It does so at device probe time in order to make
sure the widget is alive (it seems to be part of the port power
domain).

Yes, this is a terrible hack, and I wish we didn't have to deal with
this sort of crap.

>
> There's this path, which is pretty generic and does at least the
> of_map_id() part of what you're doing in imx_pcie_add_device():
> 
>     __driver_probe_device
>       really_probe
>         pci_dma_configure                       # pci_bus_type.dma_configure
>           of_dma_configure
>             of_dma_configure_id
>               of_iommu_configure
>                 of_pci_iommu_init
>                   of_iommu_configure_dev_id
>                     of_map_id
>                     of_iommu_xlate
>                       ops = iommu_ops_from_fwnode
>                       iommu_fwspec_init
>                       ops->of_xlate(dev, iommu_spec)
> 
> Maybe this needs to be extended somehow with a hook to do the
> device-specific work like updating the LUT?  Just speculating here,
> the IOMMU folks will know how this is expected to work.

That'd be a possibility. But this would be adding extra complexity to
the IOMMU core, and I'm not sure it is worth it given that these
systems are thankfully "rare".

It is also something that is conceptually part of the PCIe root port,
and not really the IOMMU, so I'm a bit reluctant to shove things
there. In any case, it would still require callbacks into the PCIe
host driver, and I have the feeling that we'd be reinventing the bus
notifier wheel, only with pointier angles...

Thanks,

	M.
Robin Murphy May 31, 2024, 2:58 p.m. UTC | #3
On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
> [+cc IOMMU and pcie-apple.c folks for comment]
> 
> On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
>> For the i.MX95, configuration of a LUT is necessary to convert Bus Device
>> Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
>> This involves examining the msi-map and smmu-map to ensure consistent
>> mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
>> registers are configured. In the absence of an msi-map, the built-in MSI
>> controller is utilized as a fallback.
>>
>> Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
>> upon the appearance of a new PCI device and when the bus is an iMX6 PCI
>> controller. This function configures the correct LUT based on Device Tree
>> Settings (DTS).
> 
> This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> have to do this, I wish it were *more* similar, i.e., copy the
> function names, bitmap tracking, code structure, etc.
> 
> I don't really know how stream IDs work, but I assume they are used on
> most or all arm64 platforms, so I'm a little surprised that of all the
> PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> this notifier.

This is one of those things that's mostly at the mercy of the PCIe root 
complex implementation. Typically the SMMU StreamID and/or GIC ITS 
DeviceID is derived directly from the PCI RID, sometimes with additional 
high-order bits hard-wired to disambiguate PCI segments. I believe this 
RID-translation LUT is a particular feature of the the Synopsys IP - I 
know there's also one on the NXP Layerscape platforms, but on those it's 
programmed by the bootloader, which also generates the appropriate 
"msi-map" and "iommu-map" properties to match. Ideally that's what i.MX 
should do as well, but hey.

> There's this path, which is pretty generic and does at least the
> of_map_id() part of what you're doing in imx_pcie_add_device():
> 
>      __driver_probe_device
>        really_probe
>          pci_dma_configure                       # pci_bus_type.dma_configure
>            of_dma_configure
>              of_dma_configure_id
>                of_iommu_configure
>                  of_pci_iommu_init
>                    of_iommu_configure_dev_id
>                      of_map_id
>                      of_iommu_xlate
>                        ops = iommu_ops_from_fwnode
>                        iommu_fwspec_init
>                        ops->of_xlate(dev, iommu_spec)
> 
> Maybe this needs to be extended somehow with a hook to do the
> device-specific work like updating the LUT?  Just speculating here,
> the IOMMU folks will know how this is expected to work.

Note that that particular code path has fundamental issues and much of 
it needs to go away (I'm working on it, but it's a rich ~8-year-old pile 
of technical debt...). IOMMU configuration needs to be happening at 
device_add() time via the IOMMU layer's own bus notifier.

If it's really necessary to do this programming from Linux, then there's 
still no point in it being dynamic - the mappings cannot ever change, 
since the rest of the kernel believes that what the DT said at boot time 
was already a property of the hardware. It would be a lot more logical, 
and likely simpler, for the driver to just read the relevant map 
property and program the entire LUT to match, all in one go at 
controller probe time. Rather like what's already commonly done with the 
parsing of "dma-ranges" to program address-translation LUTs for inbound 
windows.

Plus that would also give a chance of safely dealing with bad DTs 
specifying invalid ID mappings (by refusing to probe at all). As it is, 
returning an error from a child's BUS_NOTIFY_ADD_DEVICE does nothing 
except prevent any further notifiers from running at that point - the 
device will still be added, allowed to bind a driver, and able to start 
sending DMA/MSI traffic without the controller being correctly 
programmed, which at best won't work and at worst may break the whole 
system.

Thanks,
Robin.
Frank Li May 31, 2024, 4:14 p.m. UTC | #4
On Thu, May 30, 2024 at 06:08:32PM -0500, Bjorn Helgaas wrote:
> [+cc IOMMU and pcie-apple.c folks for comment]
> 
> On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > This involves examining the msi-map and smmu-map to ensure consistent
> > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > registers are configured. In the absence of an msi-map, the built-in MSI
> > controller is utilized as a fallback.
> > 
> > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > controller. This function configures the correct LUT based on Device Tree
> > Settings (DTS).
> 
> This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> have to do this, I wish it were *more* similar, i.e., copy the
> function names, bitmap tracking, code structure, etc.

Actually, I refer apple_pcie_bus_notifier(). I can't direct use apple's
implement because in imx95 have difference PCI host controller, another one
is PCI ECAM netc controller. At lease function name should be similar with
apple. 

> 
> I don't really know how stream IDs work, but I assume they are used on
> most or all arm64 platforms, so I'm a little surprised that of all the
> PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> this notifier.  
> 
> There's this path, which is pretty generic and does at least the
> of_map_id() part of what you're doing in imx_pcie_add_device():
> 
>     __driver_probe_device
>       really_probe
>         pci_dma_configure                       # pci_bus_type.dma_configure
>           of_dma_configure
>             of_dma_configure_id
>               of_iommu_configure
>                 of_pci_iommu_init
>                   of_iommu_configure_dev_id
>                     of_map_id
>                     of_iommu_xlate
>                       ops = iommu_ops_from_fwnode
>                       iommu_fwspec_init
>                       ops->of_xlate(dev, iommu_spec)
> 
> Maybe this needs to be extended somehow with a hook to do the
> device-specific work like updating the LUT?  Just speculating here,
> the IOMMU folks will know how this is expected to work.

Let me do more study. But I think ITS also need stream ID, not sure if
only hook IOMMU can work. Some configuration, IOMMU have not enabled. but
ITS need it.

Ideally, pci system can provide a hook function to host bridge when new
device add and remove.

Frank

> 
> Some typos and minor comments below.
> 
> > Signed-off-by: Frank Li <Frank.Li@nxp.com>
> > ---
> >  drivers/pci/controller/dwc/pci-imx6.c | 175 +++++++++++++++++++++++++++++++++-
> >  1 file changed, 174 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
> > index 29309ad0e352b..8ecc00049e20b 100644
> > --- a/drivers/pci/controller/dwc/pci-imx6.c
> > +++ b/drivers/pci/controller/dwc/pci-imx6.c
> > @@ -54,6 +54,22 @@
> >  #define IMX95_PE0_GEN_CTRL_3			0x1058
> >  #define IMX95_PCIE_LTSSM_EN			BIT(0)
> >  
> > +#define IMX95_PE0_LUT_ACSCTRL			0x1008
> > +#define IMX95_PEO_LUT_RWA			BIT(16)
> > +#define IMX95_PE0_LUT_ENLOC			GENMASK(4, 0)
> > +
> > +#define IMX95_PE0_LUT_DATA1			0x100c
> > +#define IMX95_PE0_LUT_VLD			BIT(31)
> > +#define IMX95_PE0_LUT_DAC_ID			GENMASK(10, 8)
> > +#define IMX95_PE0_LUT_STREAM_ID			GENMASK(5, 0)
> > +
> > +#define IMX95_PE0_LUT_DATA2			0x1010
> > +#define IMX95_PE0_LUT_REQID			GENMASK(31, 16)
> > +#define IMX95_PE0_LUT_MASK			GENMASK(15, 0)
> > +
> > +#define IMX95_SID_MASK				GENMASK(5, 0)
> > +#define IMX95_MAX_LUT				32
> > +
> >  #define to_imx_pcie(x)	dev_get_drvdata((x)->dev)
> >  
> >  enum imx_pcie_variants {
> > @@ -79,6 +95,7 @@ enum imx_pcie_variants {
> >  #define IMX_PCIE_FLAG_HAS_PHY_RESET		BIT(5)
> >  #define IMX_PCIE_FLAG_HAS_SERDES		BIT(6)
> >  #define IMX_PCIE_FLAG_SUPPORT_64BIT		BIT(7)
> > +#define IMX_PCIE_FLAG_MONITOR_DEV		BIT(8)
> >  
> >  #define imx_check_flag(pci, val)     (pci->drvdata->flags & val)
> >  
> > @@ -132,6 +149,8 @@ struct imx_pcie {
> >  	struct device		*pd_pcie_phy;
> >  	struct phy		*phy;
> >  	const struct imx_pcie_drvdata *drvdata;
> > +
> > +	struct mutex		lock;
> >  };
> >  
> >  /* Parameters for the waiting for PCIe PHY PLL to lock on i.MX7 */
> > @@ -215,6 +234,66 @@ static int imx95_pcie_init_phy(struct imx_pcie *imx_pcie)
> >  	return 0;
> >  }
> >  
> > +static int imx_pcie_config_lut(struct imx_pcie *imx_pcie, u16 reqid, u8 sid)
> > +{
> > +	struct dw_pcie *pci = imx_pcie->pci;
> > +	struct device *dev = pci->dev;
> > +	u32 data1, data2;
> > +	int i;
> > +
> > +	if (sid >= 64) {
> > +		dev_err(dev, "Invalid SID for index %d\n", sid);
> > +		return -EINVAL;
> > +	}
> > +
> > +	guard(mutex)(&imx_pcie->lock);
> > +
> > +	for (i = 0; i < IMX95_MAX_LUT; i++) {
> > +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, IMX95_PEO_LUT_RWA | i);
> > +
> > +		regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, &data1);
> > +		if (data1 & IMX95_PE0_LUT_VLD)
> > +			continue;
> > +
> > +		data1 = FIELD_PREP(IMX95_PE0_LUT_DAC_ID, 0);
> > +		data1 |= FIELD_PREP(IMX95_PE0_LUT_STREAM_ID, sid);
> > +		data1 |= IMX95_PE0_LUT_VLD;
> > +
> > +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, data1);
> > +
> > +		data2 = 0xffff;
> > +		data2 |= FIELD_PREP(IMX95_PE0_LUT_REQID, reqid);
> > +
> > +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, data2);
> > +
> > +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, i);
> > +
> > +		return 0;
> > +	}
> > +
> > +	dev_err(dev, "All lut already used\n");
> > +	return -EINVAL;
> > +}
> > +
> > +static void imx_pcie_remove_lut(struct imx_pcie *imx_pcie, u16 reqid)
> > +{
> > +	u32 data2 = 0;
> > +	int i;
> > +
> > +	guard(mutex)(&imx_pcie->lock);
> > +
> > +	for (i = 0; i < IMX95_MAX_LUT; i++) {
> > +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, IMX95_PEO_LUT_RWA | i);
> > +
> > +		regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, &data2);
> > +		if (FIELD_GET(IMX95_PE0_LUT_REQID, data2) == reqid) {
> > +			regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, 0);
> > +			regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, 0);
> > +			regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, i);
> > +		}
> > +	}
> > +}
> > +
> >  static void imx_pcie_configure_type(struct imx_pcie *imx_pcie)
> >  {
> >  	const struct imx_pcie_drvdata *drvdata = imx_pcie->drvdata;
> > @@ -1232,6 +1311,85 @@ static int imx_pcie_resume_noirq(struct device *dev)
> >  	return 0;
> >  }
> >  
> > +static bool imx_pcie_match_device(struct pci_bus *bus);
> 
> Can you add the imx_pcie_match_device() earlier in the file so we
> don't need this forward declaration?

imx_pcie_match_device() use global veriable imx_pcie_driver, which close to
end of the file. There will be bigger change if move imx_pcie_driver ahead.

> 
> > +static int imx_pcie_add_device(struct imx_pcie *imx_pcie, struct pci_dev *pdev)
> > +{
> > +	u32 sid_i = 0, sid_m = 0, rid = pci_dev_id(pdev);
> > +	struct device *dev = imx_pcie->pci->dev;
> > +	int err;
> > +
> > +	err = of_map_id(dev->of_node, rid, "iommu-map", "iommu-map-mask", NULL, &sid_i);
> > +	if (err)
> > +		return err;
> > +
> > +	err = of_map_id(dev->of_node, rid, "msi-map", "msi-map-mask", NULL, &sid_m);
> > +	if (err)
> > +		return err;
> > +
> > +	if (sid_i != rid && sid_m != rid)
> > +		if ((sid_i & IMX95_SID_MASK) != (sid_m & IMX95_SID_MASK)) {
> > +			dev_err(dev, "its and iommu stream id miss match, please check dts file\n");
> > +			return -EINVAL;
> > +		}
> > +
> > +	/* if iommu-map is not existed then use msi-map's stream id*/
> 
> Capitalize consistently, e.g., the most comments in this file start
> with a capital letter.
> 
> s/is not existed/does not exist/
> 
> Add space before closing */
> 
> > +	if (sid_i == rid)
> > +		sid_i = sid_m;
> > +
> > +	sid_i &= IMX95_SID_MASK;
> > +
> > +	if (sid_i != rid)
> > +		return imx_pcie_config_lut(imx_pcie, rid, sid_i);
> > +
> > +	/* Use dwc built-in MSI controller */
> > +	return 0;
> > +}
> > +
> > +static void imx_pcie_del_device(struct imx_pcie *imx_pcie, struct pci_dev *pdev)
> > +{
> > +	imx_pcie_remove_lut(imx_pcie, pci_dev_id(pdev));
> > +}
> > +
> > +
> > +static int imx_pcie_bus_notifier(struct notifier_block *nb, unsigned long action, void *data)
> > +{
> > +	struct pci_host_bridge *host;
> > +	struct imx_pcie *imx_pcie;
> > +	struct pci_dev *pdev;
> > +	int err;
> > +
> > +	pdev = to_pci_dev(data);
> > +	host = pci_find_host_bridge(pdev->bus);
> > +
> > +	if (!imx_pcie_match_device(host->bus))
> > +		return NOTIFY_OK;
> > +
> > +	imx_pcie = to_imx_pcie(to_dw_pcie_from_pp(host->sysdata));
> > +
> > +	if (!imx_check_flag(imx_pcie, IMX_PCIE_FLAG_MONITOR_DEV))
> > +		return NOTIFY_OK;
> > +
> > +	switch (action) {
> > +	case BUS_NOTIFY_ADD_DEVICE:
> > +		err = imx_pcie_add_device(imx_pcie, pdev);
> > +		if (err)
> > +			return notifier_from_errno(err);
> > +		break;
> > +	case BUS_NOTIFY_DEL_DEVICE:
> > +		imx_pcie_del_device(imx_pcie, pdev);
> > +		break;
> > +	default:
> > +		return NOTIFY_DONE;
> > +	}
> > +
> > +	return NOTIFY_OK;
> > +}
> > +
> > +static struct notifier_block imx_pcie_nb = {
> > +	.notifier_call = imx_pcie_bus_notifier,
> > +};
> > +
> >  static const struct dev_pm_ops imx_pcie_pm_ops = {
> >  	NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_pcie_suspend_noirq,
> >  				  imx_pcie_resume_noirq)
> > @@ -1264,6 +1422,8 @@ static int imx_pcie_probe(struct platform_device *pdev)
> >  	imx_pcie->pci = pci;
> >  	imx_pcie->drvdata = of_device_get_match_data(dev);
> >  
> > +	mutex_init(&imx_pcie->lock);
> > +
> >  	/* Find the PHY if one is defined, only imx7d uses it */
> >  	np = of_parse_phandle(node, "fsl,imx7d-pcie-phy", 0);
> >  	if (np) {
> > @@ -1551,7 +1711,8 @@ static const struct imx_pcie_drvdata drvdata[] = {
> >  	},
> >  	[IMX95] = {
> >  		.variant = IMX95,
> > -		.flags = IMX_PCIE_FLAG_HAS_SERDES,
> > +		.flags = IMX_PCIE_FLAG_HAS_SERDES |
> > +			 IMX_PCIE_FLAG_MONITOR_DEV,
> >  		.clk_names = imx8mq_clks,
> >  		.clks_cnt = ARRAY_SIZE(imx8mq_clks),
> >  		.ltssm_off = IMX95_PE0_GEN_CTRL_3,
> > @@ -1687,6 +1848,8 @@ DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_SYNOPSYS, 0xabcd,
> >  
> >  static int __init imx_pcie_init(void)
> >  {
> > +	int ret;
> > +
> >  #ifdef CONFIG_ARM
> >  	struct device_node *np;
> >  
> > @@ -1705,7 +1868,17 @@ static int __init imx_pcie_init(void)
> >  	hook_fault_code(8, imx6q_pcie_abort_handler, SIGBUS, 0,
> >  			"external abort on non-linefetch");
> >  #endif
> > +	ret = bus_register_notifier(&pci_bus_type, &imx_pcie_nb);
> > +	if (ret)
> > +		return ret;
> 
> I think this should go in imx6_pcie_probe().

The same nb only register once. If move to probe, bus_register_notifier()
will return error and dump error message when second pci controller
instance probe.

> 
> >  	return platform_driver_register(&imx_pcie_driver);
> >  }
> > +
> > +static void __exit imx_pcie_exit(void)
> > +{
> > +	bus_unregister_notifier(&pci_bus_type, &imx_pcie_nb);
> 
> It looks like this driver is removable?

Actually, I think not. I am not sure how to prevent driver remove. There
are raise condition when driver remove and a pci device hot plug at the
same time although hot plug pci have not implement yet in imx platform.

> 
> What happens when an external abort occurs after the
> imx6q_pcie_abort_handler() text is removed?
> 
> > +}
> > +
> >  device_initcall(imx_pcie_init);
> > +__exitcall(imx_pcie_exit);
> > 
> > -- 
> > 2.34.1
> >
Frank Li May 31, 2024, 4:25 p.m. UTC | #5
On Fri, May 31, 2024 at 03:58:49PM +0100, Robin Murphy wrote:
> On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
> > [+cc IOMMU and pcie-apple.c folks for comment]
> > 
> > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > This involves examining the msi-map and smmu-map to ensure consistent
> > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > controller is utilized as a fallback.
> > > 
> > > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > controller. This function configures the correct LUT based on Device Tree
> > > Settings (DTS).
> > 
> > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > have to do this, I wish it were *more* similar, i.e., copy the
> > function names, bitmap tracking, code structure, etc.
> > 
> > I don't really know how stream IDs work, but I assume they are used on
> > most or all arm64 platforms, so I'm a little surprised that of all the
> > PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> > this notifier.
> 
> This is one of those things that's mostly at the mercy of the PCIe root
> complex implementation. Typically the SMMU StreamID and/or GIC ITS DeviceID
> is derived directly from the PCI RID, sometimes with additional high-order
> bits hard-wired to disambiguate PCI segments. I believe this RID-translation
> LUT is a particular feature of the the Synopsys IP - I know there's also one
> on the NXP Layerscape platforms, but on those it's programmed by the
> bootloader, which also generates the appropriate "msi-map" and "iommu-map"
> properties to match. Ideally that's what i.MX should do as well, but hey.

Actually, I think it is not good for uboot config it because PCIe device is
hotplug, such as SD7.0.  SD7.0 card may plug after system boot. uboot miss
config it because uboot scan nothing when it run.

> 
> > There's this path, which is pretty generic and does at least the
> > of_map_id() part of what you're doing in imx_pcie_add_device():
> > 
> >      __driver_probe_device
> >        really_probe
> >          pci_dma_configure                       # pci_bus_type.dma_configure
> >            of_dma_configure
> >              of_dma_configure_id
> >                of_iommu_configure
> >                  of_pci_iommu_init
> >                    of_iommu_configure_dev_id
> >                      of_map_id
> >                      of_iommu_xlate
> >                        ops = iommu_ops_from_fwnode
> >                        iommu_fwspec_init
> >                        ops->of_xlate(dev, iommu_spec)
> > 
> > Maybe this needs to be extended somehow with a hook to do the
> > device-specific work like updating the LUT?  Just speculating here,
> > the IOMMU folks will know how this is expected to work.
> 
> Note that that particular code path has fundamental issues and much of it
> needs to go away (I'm working on it, but it's a rich ~8-year-old pile of
> technical debt...). IOMMU configuration needs to be happening at
> device_add() time via the IOMMU layer's own bus notifier.
> 
> If it's really necessary to do this programming from Linux, then there's
> still no point in it being dynamic - the mappings cannot ever change, since
> the rest of the kernel believes that what the DT said at boot time was
> already a property of the hardware. It would be a lot more logical, and
> likely simpler, for the driver to just read the relevant map property and
> program the entire LUT to match, all in one go at controller probe time.
> Rather like what's already commonly done with the parsing of "dma-ranges" to
> program address-translation LUTs for inbound windows.

Do you means prefer v3's method?
https://lore.kernel.org/imx/20240402-pci2_upstream-v3-8-803414bdb430@nxp.com/

> 
> Plus that would also give a chance of safely dealing with bad DTs specifying
> invalid ID mappings (by refusing to probe at all). As it is, returning an
> error from a child's BUS_NOTIFY_ADD_DEVICE does nothing except prevent any
> further notifiers from running at that point - the device will still be
> added, allowed to bind a driver, and able to start sending DMA/MSI traffic
> without the controller being correctly programmed, which at best won't work
> and at worst may break the whole system.
> 
> Thanks,
> Robin.
Bjorn Helgaas June 3, 2024, 5:11 p.m. UTC | #6
On Fri, May 31, 2024 at 12:14:18PM -0400, Frank Li wrote:
> On Thu, May 30, 2024 at 06:08:32PM -0500, Bjorn Helgaas wrote:
> > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > This involves examining the msi-map and smmu-map to ensure consistent
> > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > controller is utilized as a fallback.
> > > 
> > > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > controller. This function configures the correct LUT based on Device Tree
> > > Settings (DTS).
> > 
> > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > have to do this, I wish it were *more* similar, i.e., copy the
> > function names, bitmap tracking, code structure, etc.
> 
> Actually, I refer apple_pcie_bus_notifier(). I can't direct use apple's
> implement because in imx95 have difference PCI host controller, another one
> is PCI ECAM netc controller. At lease function name should be similar with
> apple. 

I know it's different hardware, so obviously it can't be exactly the
same.  These are the differences that looked possibly unnecessary:

  - registering from initcall instead of .probe():

      apple_pcie_probe                  # .probe() method
        bus_register_notifier(&pci_bus_type, &apple_pcie_nb)

      imx_pcie_init                     # device_initcall()
        bus_register_notifier(&pci_bus_type, &imx_pcie_nb)

  - naming BUS_NOTIFY_DEL_DEVICE function:

      apple_pcie_release_device()
      imx_pcie_del_device()

  - tracking entries in use via bitmap vs scanning hardware for
    invalid entries:

      bitmap_find_free_region           # apple

      imx_pcie_config_lut               # imx
        for (i = 0; i < IMX95_MAX_LUT; i++)
          regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, &data1)
          if (data1 & IMX95_PE0_LUT_VLD)
            continue

When we fix a bug in one driver, it's easier to check whether other
drivers also need the fix if they use the same structure and names.

Bjorn
Bjorn Helgaas June 3, 2024, 5:19 p.m. UTC | #7
On Fri, May 31, 2024 at 03:58:49PM +0100, Robin Murphy wrote:
> On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
> > [+cc IOMMU and pcie-apple.c folks for comment]
> > 
> > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > This involves examining the msi-map and smmu-map to ensure consistent
> > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > controller is utilized as a fallback.
> > > 
> > > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > controller. This function configures the correct LUT based on Device Tree
> > > Settings (DTS).
> > 
> > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > have to do this, I wish it were *more* similar, i.e., copy the
> > function names, bitmap tracking, code structure, etc.
> > 
> > I don't really know how stream IDs work, but I assume they are used on
> > most or all arm64 platforms, so I'm a little surprised that of all the
> > PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> > this notifier.
> 
> This is one of those things that's mostly at the mercy of the PCIe root
> complex implementation. Typically the SMMU StreamID and/or GIC ITS DeviceID
> is derived directly from the PCI RID, sometimes with additional high-order
> bits hard-wired to disambiguate PCI segments. I believe this RID-translation
> LUT is a particular feature of the the Synopsys IP - I know there's also one
> on the NXP Layerscape platforms, but on those it's programmed by the
> bootloader, which also generates the appropriate "msi-map" and "iommu-map"
> properties to match. Ideally that's what i.MX should do as well, but hey.

Maybe this RID-translation is a feature of i.MX, not of Synopsys?  I
see that the LUT CSR accesses use IMX95_* definitions.

> If it's really necessary to do this programming from Linux, then there's
> still no point in it being dynamic - the mappings cannot ever change, since
> the rest of the kernel believes that what the DT said at boot time was
> already a property of the hardware. It would be a lot more logical, and
> likely simpler, for the driver to just read the relevant map property and
> program the entire LUT to match, all in one go at controller probe time.
> Rather like what's already commonly done with the parsing of "dma-ranges" to
> program address-translation LUTs for inbound windows.
> 
> Plus that would also give a chance of safely dealing with bad DTs specifying
> invalid ID mappings (by refusing to probe at all). As it is, returning an
> error from a child's BUS_NOTIFY_ADD_DEVICE does nothing except prevent any
> further notifiers from running at that point - the device will still be
> added, allowed to bind a driver, and able to start sending DMA/MSI traffic
> without the controller being correctly programmed, which at best won't work
> and at worst may break the whole system.

Frank, could the imx LUT be programmed once at boot-time instead of at
device-add time?  I'm guessing maybe not because apparently there is a
risk of running out of LUT entries?

It sounds like the consequences of running out of LUT entries are
catastrophic, e.g., memory corruption from mis-directed DMA?  If
that's possible, I think we need to figure out how to prevent the
device from being used, not just dev_warn() about it.

Bjorn
Frank Li June 3, 2024, 6:21 p.m. UTC | #8
On Mon, Jun 03, 2024 at 12:11:49PM -0500, Bjorn Helgaas wrote:
> On Fri, May 31, 2024 at 12:14:18PM -0400, Frank Li wrote:
> > On Thu, May 30, 2024 at 06:08:32PM -0500, Bjorn Helgaas wrote:
> > > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > > This involves examining the msi-map and smmu-map to ensure consistent
> > > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > > controller is utilized as a fallback.
> > > > 
> > > > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > > controller. This function configures the correct LUT based on Device Tree
> > > > Settings (DTS).
> > > 
> > > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > > have to do this, I wish it were *more* similar, i.e., copy the
> > > function names, bitmap tracking, code structure, etc.
> > 
> > Actually, I refer apple_pcie_bus_notifier(). I can't direct use apple's
> > implement because in imx95 have difference PCI host controller, another one
> > is PCI ECAM netc controller. At lease function name should be similar with
> > apple. 
> 
> I know it's different hardware, so obviously it can't be exactly the
> same.  These are the differences that looked possibly unnecessary:
> 
>   - registering from initcall instead of .probe():
> 
>       apple_pcie_probe                  # .probe() method
>         bus_register_notifier(&pci_bus_type, &apple_pcie_nb)
> 
>       imx_pcie_init                     # device_initcall()
>         bus_register_notifier(&pci_bus_type, &imx_pcie_nb)

Maybe apple only one PCIe controller with multi ports. One nb just register
once.

If there are multi PCIe controller instance, second bus_register_notifier()
report error.

Of course, if you like, I can use global reference counter to handle this.

> 
>   - naming BUS_NOTIFY_DEL_DEVICE function:
> 
>       apple_pcie_release_device()
>       imx_pcie_del_device()

Okay, I can change to release_device().

> 
>   - tracking entries in use via bitmap vs scanning hardware for
>     invalid entries:
> 
>       bitmap_find_free_region           # apple
> 
>       imx_pcie_config_lut               # imx
>         for (i = 0; i < IMX95_MAX_LUT; i++)
>           regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, &data1)
>           if (data1 & IMX95_PE0_LUT_VLD)
>             continue

Okay, I can change to use bitmap.

> 
> When we fix a bug in one driver, it's easier to check whether other
> drivers also need the fix if they use the same structure and names.
> 
> Bjorn
Frank Li June 3, 2024, 6:42 p.m. UTC | #9
On Mon, Jun 03, 2024 at 12:19:21PM -0500, Bjorn Helgaas wrote:
> On Fri, May 31, 2024 at 03:58:49PM +0100, Robin Murphy wrote:
> > On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
> > > [+cc IOMMU and pcie-apple.c folks for comment]
> > > 
> > > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > > This involves examining the msi-map and smmu-map to ensure consistent
> > > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > > controller is utilized as a fallback.
> > > > 
> > > > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > > controller. This function configures the correct LUT based on Device Tree
> > > > Settings (DTS).
> > > 
> > > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > > have to do this, I wish it were *more* similar, i.e., copy the
> > > function names, bitmap tracking, code structure, etc.
> > > 
> > > I don't really know how stream IDs work, but I assume they are used on
> > > most or all arm64 platforms, so I'm a little surprised that of all the
> > > PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> > > this notifier.
> > 
> > This is one of those things that's mostly at the mercy of the PCIe root
> > complex implementation. Typically the SMMU StreamID and/or GIC ITS DeviceID
> > is derived directly from the PCI RID, sometimes with additional high-order
> > bits hard-wired to disambiguate PCI segments. I believe this RID-translation
> > LUT is a particular feature of the the Synopsys IP - I know there's also one
> > on the NXP Layerscape platforms, but on those it's programmed by the
> > bootloader, which also generates the appropriate "msi-map" and "iommu-map"
> > properties to match. Ideally that's what i.MX should do as well, but hey.
> 
> Maybe this RID-translation is a feature of i.MX, not of Synopsys?  I
> see that the LUT CSR accesses use IMX95_* definitions.

Yes, it convert 16bit RID to 6bit stream id.

> 
> > If it's really necessary to do this programming from Linux, then there's
> > still no point in it being dynamic - the mappings cannot ever change, since
> > the rest of the kernel believes that what the DT said at boot time was
> > already a property of the hardware. It would be a lot more logical, and
> > likely simpler, for the driver to just read the relevant map property and
> > program the entire LUT to match, all in one go at controller probe time.
> > Rather like what's already commonly done with the parsing of "dma-ranges" to
> > program address-translation LUTs for inbound windows.
> > 
> > Plus that would also give a chance of safely dealing with bad DTs specifying
> > invalid ID mappings (by refusing to probe at all). As it is, returning an
> > error from a child's BUS_NOTIFY_ADD_DEVICE does nothing except prevent any
> > further notifiers from running at that point - the device will still be
> > added, allowed to bind a driver, and able to start sending DMA/MSI traffic
> > without the controller being correctly programmed, which at best won't work
> > and at worst may break the whole system.
> 
> Frank, could the imx LUT be programmed once at boot-time instead of at
> device-add time?  I'm guessing maybe not because apparently there is a
> risk of running out of LUT entries?

It is not good idea to depend on boot loader so much. Some hot plug devics
(SD7.0) may plug after system boot. Two PCIe instances shared one set
of 6bits stream id (total 64). Assume total 16 assign to two PCIe
controllers. each have 8 stream id. If use uboot assign it static, each
PCIe controller have below 8 devices.  It will be failrue one controller
connect 7, another connect 9. but if dynamtic alloc when devices add, both
controller can work.

Although we have not so much devices now,  this way give us possility to
improve it in future.


> 
> It sounds like the consequences of running out of LUT entries are
> catastrophic, e.g., memory corruption from mis-directed DMA?  If
> that's possible, I think we need to figure out how to prevent the
> device from being used, not just dev_warn() about it.

Yes, but so far, we have not met such problem now. We can improve it when
we really face such problem.

> 
> Bjorn
Bjorn Helgaas June 3, 2024, 6:56 p.m. UTC | #10
On Mon, Jun 03, 2024 at 02:42:45PM -0400, Frank Li wrote:
> On Mon, Jun 03, 2024 at 12:19:21PM -0500, Bjorn Helgaas wrote:
> > On Fri, May 31, 2024 at 03:58:49PM +0100, Robin Murphy wrote:
> > > On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
> > > > [+cc IOMMU and pcie-apple.c folks for comment]
> > > > 
> > > > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > > > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > > > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > > > This involves examining the msi-map and smmu-map to ensure consistent
> > > > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > > > controller is utilized as a fallback.
> > > > > 
> > > > > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > > > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > > > controller. This function configures the correct LUT based on Device Tree
> > > > > Settings (DTS).
> > > > 
> > > > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > > > have to do this, I wish it were *more* similar, i.e., copy the
> > > > function names, bitmap tracking, code structure, etc.
> > > > 
> > > > I don't really know how stream IDs work, but I assume they are used on
> > > > most or all arm64 platforms, so I'm a little surprised that of all the
> > > > PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> > > > this notifier.
> > > 
> > > This is one of those things that's mostly at the mercy of the PCIe root
> > > complex implementation. Typically the SMMU StreamID and/or GIC ITS DeviceID
> > > is derived directly from the PCI RID, sometimes with additional high-order
> > > bits hard-wired to disambiguate PCI segments. I believe this RID-translation
> > > LUT is a particular feature of the the Synopsys IP - I know there's also one
> > > on the NXP Layerscape platforms, but on those it's programmed by the
> > > bootloader, which also generates the appropriate "msi-map" and "iommu-map"
> > > properties to match. Ideally that's what i.MX should do as well, but hey.
> > 
> > Maybe this RID-translation is a feature of i.MX, not of Synopsys?  I
> > see that the LUT CSR accesses use IMX95_* definitions.
> 
> Yes, it convert 16bit RID to 6bit stream id.

IIUC, you're saying this is not a Synopsys feature, it's an i.MX
feature.

> > > If it's really necessary to do this programming from Linux, then there's
> > > still no point in it being dynamic - the mappings cannot ever change, since
> > > the rest of the kernel believes that what the DT said at boot time was
> > > already a property of the hardware. It would be a lot more logical, and
> > > likely simpler, for the driver to just read the relevant map property and
> > > program the entire LUT to match, all in one go at controller probe time.
> > > Rather like what's already commonly done with the parsing of "dma-ranges" to
> > > program address-translation LUTs for inbound windows.
> > > 
> > > Plus that would also give a chance of safely dealing with bad DTs specifying
> > > invalid ID mappings (by refusing to probe at all). As it is, returning an
> > > error from a child's BUS_NOTIFY_ADD_DEVICE does nothing except prevent any
> > > further notifiers from running at that point - the device will still be
> > > added, allowed to bind a driver, and able to start sending DMA/MSI traffic
> > > without the controller being correctly programmed, which at best won't work
> > > and at worst may break the whole system.
> > 
> > Frank, could the imx LUT be programmed once at boot-time instead of at
> > device-add time?  I'm guessing maybe not because apparently there is a
> > risk of running out of LUT entries?
> 
> It is not good idea to depend on boot loader so much.

I meant "could this be programmed once when the Linux imx host
controller driver is probed?"  But from the below, it sounds like
that's not possible in general because you don't have enough stream
IDs to do that.

> Some hot plug devics
> (SD7.0) may plug after system boot. Two PCIe instances shared one set
> of 6bits stream id (total 64). Assume total 16 assign to two PCIe
> controllers. each have 8 stream id. If use uboot assign it static, each
> PCIe controller have below 8 devices.  It will be failrue one controller
> connect 7, another connect 9. but if dynamtic alloc when devices add, both
> controller can work.
> 
> Although we have not so much devices now,  this way give us possility to
> improve it in future.
> 
> > It sounds like the consequences of running out of LUT entries are
> > catastrophic, e.g., memory corruption from mis-directed DMA?  If
> > that's possible, I think we need to figure out how to prevent the
> > device from being used, not just dev_warn() about it.
> 
> Yes, but so far, we have not met such problem now. We can improve it when
> we really face such problem.

If this controller can only support DMA from a limited number of
endpoints below it, I think we should figure out how to enforce that
directly.  Maybe we can prevent drivers from enabling bus mastering or
something.  I'm not happy with the idea of waiting for and debugging a
report of data corruption.

Bjorn
Frank Li June 3, 2024, 8:07 p.m. UTC | #11
On Mon, Jun 03, 2024 at 01:56:27PM -0500, Bjorn Helgaas wrote:
> On Mon, Jun 03, 2024 at 02:42:45PM -0400, Frank Li wrote:
> > On Mon, Jun 03, 2024 at 12:19:21PM -0500, Bjorn Helgaas wrote:
> > > On Fri, May 31, 2024 at 03:58:49PM +0100, Robin Murphy wrote:
> > > > On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
> > > > > [+cc IOMMU and pcie-apple.c folks for comment]
> > > > > 
> > > > > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > > > > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > > > > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > > > > This involves examining the msi-map and smmu-map to ensure consistent
> > > > > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > > > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > > > > controller is utilized as a fallback.
> > > > > > 
> > > > > > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > > > > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > > > > controller. This function configures the correct LUT based on Device Tree
> > > > > > Settings (DTS).
> > > > > 
> > > > > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > > > > have to do this, I wish it were *more* similar, i.e., copy the
> > > > > function names, bitmap tracking, code structure, etc.
> > > > > 
> > > > > I don't really know how stream IDs work, but I assume they are used on
> > > > > most or all arm64 platforms, so I'm a little surprised that of all the
> > > > > PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> > > > > this notifier.
> > > > 
> > > > This is one of those things that's mostly at the mercy of the PCIe root
> > > > complex implementation. Typically the SMMU StreamID and/or GIC ITS DeviceID
> > > > is derived directly from the PCI RID, sometimes with additional high-order
> > > > bits hard-wired to disambiguate PCI segments. I believe this RID-translation
> > > > LUT is a particular feature of the the Synopsys IP - I know there's also one
> > > > on the NXP Layerscape platforms, but on those it's programmed by the
> > > > bootloader, which also generates the appropriate "msi-map" and "iommu-map"
> > > > properties to match. Ideally that's what i.MX should do as well, but hey.
> > > 
> > > Maybe this RID-translation is a feature of i.MX, not of Synopsys?  I
> > > see that the LUT CSR accesses use IMX95_* definitions.
> > 
> > Yes, it convert 16bit RID to 6bit stream id.
> 
> IIUC, you're saying this is not a Synopsys feature, it's an i.MX
> feature.

Yes, it is i.MX feature. But I think other vendor should have similar
situation if use old arm smmu.

> 
> > > > If it's really necessary to do this programming from Linux, then there's
> > > > still no point in it being dynamic - the mappings cannot ever change, since
> > > > the rest of the kernel believes that what the DT said at boot time was
> > > > already a property of the hardware. It would be a lot more logical, and
> > > > likely simpler, for the driver to just read the relevant map property and
> > > > program the entire LUT to match, all in one go at controller probe time.
> > > > Rather like what's already commonly done with the parsing of "dma-ranges" to
> > > > program address-translation LUTs for inbound windows.
> > > > 
> > > > Plus that would also give a chance of safely dealing with bad DTs specifying
> > > > invalid ID mappings (by refusing to probe at all). As it is, returning an
> > > > error from a child's BUS_NOTIFY_ADD_DEVICE does nothing except prevent any
> > > > further notifiers from running at that point - the device will still be
> > > > added, allowed to bind a driver, and able to start sending DMA/MSI traffic
> > > > without the controller being correctly programmed, which at best won't work
> > > > and at worst may break the whole system.
> > > 
> > > Frank, could the imx LUT be programmed once at boot-time instead of at
> > > device-add time?  I'm guessing maybe not because apparently there is a
> > > risk of running out of LUT entries?
> > 
> > It is not good idea to depend on boot loader so much.
> 
> I meant "could this be programmed once when the Linux imx host
> controller driver is probed?"  But from the below, it sounds like
> that's not possible in general because you don't have enough stream
> IDs to do that.

Oh! sorry miss understand what your means. It is possible like what I did
at v3 version. But I think it is not good enough. 

> 
> > Some hot plug devics
> > (SD7.0) may plug after system boot. Two PCIe instances shared one set
> > of 6bits stream id (total 64). Assume total 16 assign to two PCIe
> > controllers. each have 8 stream id. If use uboot assign it static, each
> > PCIe controller have below 8 devices.  It will be failrue one controller
> > connect 7, another connect 9. but if dynamtic alloc when devices add, both
> > controller can work.
> > 
> > Although we have not so much devices now,  this way give us possility to
> > improve it in future.
> > 
> > > It sounds like the consequences of running out of LUT entries are
> > > catastrophic, e.g., memory corruption from mis-directed DMA?  If
> > > that's possible, I think we need to figure out how to prevent the
> > > device from being used, not just dev_warn() about it.
> > 
> > Yes, but so far, we have not met such problem now. We can improve it when
> > we really face such problem.
> 
> If this controller can only support DMA from a limited number of
> endpoints below it, I think we should figure out how to enforce that
> directly.  Maybe we can prevent drivers from enabling bus mastering or
> something.  I'm not happy with the idea of waiting for and debugging a
> report of data corruption.

It may add a pre-add hook function to pci bridge. let me do more research.

Frank

> 
> Bjorn
Robin Murphy June 3, 2024, 8:20 p.m. UTC | #12
On 2024-06-03 6:19 pm, Bjorn Helgaas wrote:
> On Fri, May 31, 2024 at 03:58:49PM +0100, Robin Murphy wrote:
>> On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
>>> [+cc IOMMU and pcie-apple.c folks for comment]
>>>
>>> On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
>>>> For the i.MX95, configuration of a LUT is necessary to convert Bus Device
>>>> Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
>>>> This involves examining the msi-map and smmu-map to ensure consistent
>>>> mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
>>>> registers are configured. In the absence of an msi-map, the built-in MSI
>>>> controller is utilized as a fallback.
>>>>
>>>> Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
>>>> upon the appearance of a new PCI device and when the bus is an iMX6 PCI
>>>> controller. This function configures the correct LUT based on Device Tree
>>>> Settings (DTS).
>>>
>>> This scheme is pretty similar to apple_pcie_bus_notifier().  If we
>>> have to do this, I wish it were *more* similar, i.e., copy the
>>> function names, bitmap tracking, code structure, etc.
>>>
>>> I don't really know how stream IDs work, but I assume they are used on
>>> most or all arm64 platforms, so I'm a little surprised that of all the
>>> PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
>>> this notifier.
>>
>> This is one of those things that's mostly at the mercy of the PCIe root
>> complex implementation. Typically the SMMU StreamID and/or GIC ITS DeviceID
>> is derived directly from the PCI RID, sometimes with additional high-order
>> bits hard-wired to disambiguate PCI segments. I believe this RID-translation
>> LUT is a particular feature of the the Synopsys IP - I know there's also one
>> on the NXP Layerscape platforms, but on those it's programmed by the
>> bootloader, which also generates the appropriate "msi-map" and "iommu-map"
>> properties to match. Ideally that's what i.MX should do as well, but hey.
> 
> Maybe this RID-translation is a feature of i.MX, not of Synopsys?  I
> see that the LUT CSR accesses use IMX95_* definitions.

Well, it's not unreasonable to call things "IMX95" in this context if 
they are only relevant to the configuration used by i.MX95, and not to 
the other i.MX SoCs which this driver also supports. However the data 
register fields certainly look suspiciously similar to those used on 
Layerscape[1], although I guess that still doesn't rule out it being 
NXP's own widget either. Anyway, the exact details aren't really 
significant, the point was really just to say don't expect this to 
generalise much beyond what you've seen already, and that there's 
precedent for bootloaders doing this for us.

>> If it's really necessary to do this programming from Linux, then there's
>> still no point in it being dynamic - the mappings cannot ever change, since
>> the rest of the kernel believes that what the DT said at boot time was
>> already a property of the hardware. It would be a lot more logical, and
>> likely simpler, for the driver to just read the relevant map property and
>> program the entire LUT to match, all in one go at controller probe time.
>> Rather like what's already commonly done with the parsing of "dma-ranges" to
>> program address-translation LUTs for inbound windows.
>>
>> Plus that would also give a chance of safely dealing with bad DTs specifying
>> invalid ID mappings (by refusing to probe at all). As it is, returning an
>> error from a child's BUS_NOTIFY_ADD_DEVICE does nothing except prevent any
>> further notifiers from running at that point - the device will still be
>> added, allowed to bind a driver, and able to start sending DMA/MSI traffic
>> without the controller being correctly programmed, which at best won't work
>> and at worst may break the whole system.
> 
> Frank, could the imx LUT be programmed once at boot-time instead of at
> device-add time?  I'm guessing maybe not because apparently there is a
> risk of running out of LUT entries?

The risk still exists just as much either way - if we have a bogus DT 
and/or just more PCI RIDs present than we can handle, we're going to 
have a bad time. There's no advantage to only finding that out once we 
try to add the 33rd device and it's too late to even do anything about it.

In fact if anything, this notifier approach exacerbates that risk the 
most by consuming one LUT entry per PCI RID regardless of whether an 
"iommu-map-mask" is involved. Assuming the IMX95_PE0_LUT_MASK field is 
the same as its Layerscape counterpart, we could support >32 RIDs if the 
map and mask are constructed to squash multiple RIDs onto each StreamID 
(the SMMU driver supports this), and we have the up-front information to 
easily configure hardware masking in the LUT itself. It's not 
necessarily possible to reconstruct such mappings from only seeing 
individual input and output values one-by-one.

Thanks,
Robin.

[1] 
https://source.denx.de/u-boot/u-boot/-/blob/master/drivers/pci/pcie_layerscape_fixup.c?ref_type=heads#L83

> It sounds like the consequences of running out of LUT entries are
> catastrophic, e.g., memory corruption from mis-directed DMA?  If
> that's possible, I think we need to figure out how to prevent the
> device from being used, not just dev_warn() about it.
> 
> Bjorn
Laurentiu Tudor June 3, 2024, 8:29 p.m. UTC | #13
On 5/31/24 17:58, Robin Murphy wrote:
> On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
>> [+cc IOMMU and pcie-apple.c folks for comment]
>>
>> On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
>>> For the i.MX95, configuration of a LUT is necessary to convert Bus 
>>> Device
>>> Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
>>> This involves examining the msi-map and smmu-map to ensure consistent
>>> mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
>>> registers are configured. In the absence of an msi-map, the built-in MSI
>>> controller is utilized as a fallback.
>>>
>>> Additionally, register a PCI bus notifier to trigger 
>>> imx_pcie_add_device()
>>> upon the appearance of a new PCI device and when the bus is an iMX6 PCI
>>> controller. This function configures the correct LUT based on Device 
>>> Tree
>>> Settings (DTS).
>>
>> This scheme is pretty similar to apple_pcie_bus_notifier().  If we
>> have to do this, I wish it were *more* similar, i.e., copy the
>> function names, bitmap tracking, code structure, etc.
>>
>> I don't really know how stream IDs work, but I assume they are used on
>> most or all arm64 platforms, so I'm a little surprised that of all the
>> PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
>> this notifier.
> 
> This is one of those things that's mostly at the mercy of the PCIe root 
> complex implementation. Typically the SMMU StreamID and/or GIC ITS 
> DeviceID is derived directly from the PCI RID, sometimes with additional 
> high-order bits hard-wired to disambiguate PCI segments. I believe this 
> RID-translation LUT is a particular feature of the the Synopsys IP - I 
> know there's also one on the NXP Layerscape platforms, but on those it's 
> programmed by the bootloader, which also generates the appropriate 
> "msi-map" and "iommu-map" properties to match. Ideally that's what i.MX 
> should do as well, but hey.

That's usually fine, except when SRIOV and/or hotplug devices (that is, 
not discoverable at bootloader time) come into play. We came up with 
this "solution" to cover these more dynamic scenarios.

https://source.denx.de/u-boot/u-boot/-/commit/2a5bbb13cc39102a68fcc31056925427ab44b591

---
Best Regards, Laurentiu

>> There's this path, which is pretty generic and does at least the
>> of_map_id() part of what you're doing in imx_pcie_add_device():
>>
>>      __driver_probe_device
>>        really_probe
>>          pci_dma_configure                       # 
>> pci_bus_type.dma_configure
>>            of_dma_configure
>>              of_dma_configure_id
>>                of_iommu_configure
>>                  of_pci_iommu_init
>>                    of_iommu_configure_dev_id
>>                      of_map_id
>>                      of_iommu_xlate
>>                        ops = iommu_ops_from_fwnode
>>                        iommu_fwspec_init
>>                        ops->of_xlate(dev, iommu_spec)
>>
>> Maybe this needs to be extended somehow with a hook to do the
>> device-specific work like updating the LUT?  Just speculating here,
>> the IOMMU folks will know how this is expected to work.
> 
> Note that that particular code path has fundamental issues and much of 
> it needs to go away (I'm working on it, but it's a rich ~8-year-old pile 
> of technical debt...). IOMMU configuration needs to be happening at 
> device_add() time via the IOMMU layer's own bus notifier.
> 
> If it's really necessary to do this programming from Linux, then there's 
> still no point in it being dynamic - the mappings cannot ever change, 
> since the rest of the kernel believes that what the DT said at boot time 
> was already a property of the hardware. It would be a lot more logical, 
> and likely simpler, for the driver to just read the relevant map 
> property and program the entire LUT to match, all in one go at 
> controller probe time. Rather like what's already commonly done with the 
> parsing of "dma-ranges" to program address-translation LUTs for inbound 
> windows.
> 
> Plus that would also give a chance of safely dealing with bad DTs 
> specifying invalid ID mappings (by refusing to probe at all). As it is, 
> returning an error from a child's BUS_NOTIFY_ADD_DEVICE does nothing 
> except prevent any further notifiers from running at that point - the 
> device will still be added, allowed to bind a driver, and able to start 
> sending DMA/MSI traffic without the controller being correctly 
> programmed, which at best won't work and at worst may break the whole 
> system.
> 
> Thanks,
> Robin.
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Frank Li June 3, 2024, 9:04 p.m. UTC | #14
On Mon, Jun 03, 2024 at 09:20:03PM +0100, Robin Murphy wrote:
> On 2024-06-03 6:19 pm, Bjorn Helgaas wrote:
> > On Fri, May 31, 2024 at 03:58:49PM +0100, Robin Murphy wrote:
> > > On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
> > > > [+cc IOMMU and pcie-apple.c folks for comment]
> > > > 
> > > > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > > > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > > > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > > > This involves examining the msi-map and smmu-map to ensure consistent
> > > > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > > > controller is utilized as a fallback.
> > > > > 
> > > > > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > > > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > > > controller. This function configures the correct LUT based on Device Tree
> > > > > Settings (DTS).
> > > > 
> > > > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > > > have to do this, I wish it were *more* similar, i.e., copy the
> > > > function names, bitmap tracking, code structure, etc.
> > > > 
> > > > I don't really know how stream IDs work, but I assume they are used on
> > > > most or all arm64 platforms, so I'm a little surprised that of all the
> > > > PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> > > > this notifier.
> > > 
> > > This is one of those things that's mostly at the mercy of the PCIe root
> > > complex implementation. Typically the SMMU StreamID and/or GIC ITS DeviceID
> > > is derived directly from the PCI RID, sometimes with additional high-order
> > > bits hard-wired to disambiguate PCI segments. I believe this RID-translation
> > > LUT is a particular feature of the the Synopsys IP - I know there's also one
> > > on the NXP Layerscape platforms, but on those it's programmed by the
> > > bootloader, which also generates the appropriate "msi-map" and "iommu-map"
> > > properties to match. Ideally that's what i.MX should do as well, but hey.
> > 
> > Maybe this RID-translation is a feature of i.MX, not of Synopsys?  I
> > see that the LUT CSR accesses use IMX95_* definitions.
> 
> Well, it's not unreasonable to call things "IMX95" in this context if they
> are only relevant to the configuration used by i.MX95, and not to the other
> i.MX SoCs which this driver also supports. However the data register fields
> certainly look suspiciously similar to those used on Layerscape[1], although
> I guess that still doesn't rule out it being NXP's own widget either.
> Anyway, the exact details aren't really significant, the point was really
> just to say don't expect this to generalise much beyond what you've seen
> already, and that there's precedent for bootloaders doing this for us.

It is re-used layerscape design at this point. I don't know the history,
why choose use bootloader to config it, supposed it should be done at
kernel. We found some problem by using bootloader to config LUT. Most
layserscape system PCI devices are fixed during boot.

During I debug layerscape PCI, I was trapped by uboot many times. It is
quite anoise, especially using difference version uboot.

> 
> > > If it's really necessary to do this programming from Linux, then there's
> > > still no point in it being dynamic - the mappings cannot ever change, since
> > > the rest of the kernel believes that what the DT said at boot time was
> > > already a property of the hardware. It would be a lot more logical, and
> > > likely simpler, for the driver to just read the relevant map property and
> > > program the entire LUT to match, all in one go at controller probe time.
> > > Rather like what's already commonly done with the parsing of "dma-ranges" to
> > > program address-translation LUTs for inbound windows.
> > > 
> > > Plus that would also give a chance of safely dealing with bad DTs specifying
> > > invalid ID mappings (by refusing to probe at all). As it is, returning an
> > > error from a child's BUS_NOTIFY_ADD_DEVICE does nothing except prevent any
> > > further notifiers from running at that point - the device will still be
> > > added, allowed to bind a driver, and able to start sending DMA/MSI traffic
> > > without the controller being correctly programmed, which at best won't work
> > > and at worst may break the whole system.
> > 
> > Frank, could the imx LUT be programmed once at boot-time instead of at
> > device-add time?  I'm guessing maybe not because apparently there is a
> > risk of running out of LUT entries?
> 
> The risk still exists just as much either way - if we have a bogus DT and/or
> just more PCI RIDs present than we can handle, we're going to have a bad
> time. There's no advantage to only finding that out once we try to add the
> 33rd device and it's too late to even do anything about it.
> 
> In fact if anything, this notifier approach exacerbates that risk the most
> by consuming one LUT entry per PCI RID regardless of whether an
> "iommu-map-mask" is involved. Assuming the IMX95_PE0_LUT_MASK field is the
> same as its Layerscape counterpart, we could support >32 RIDs if the map and
> mask are constructed to squash multiple RIDs onto each StreamID (the SMMU
> driver supports this), and we have the up-front information to easily
> configure hardware masking in the LUT itself. It's not necessarily possible
> to reconstruct such mappings from only seeing individual input and output
> values one-by-one.

iommu may share one stream id for multi-devices. but ITS MSI can't. each
device's MSI index start from 0. It needs difference stream id for each
device.

> 
> Thanks,
> Robin.
> 
> [1] https://source.denx.de/u-boot/u-boot/-/blob/master/drivers/pci/pcie_layerscape_fixup.c?ref_type=heads#L83

Thanks, but It can't resolve device hot-plug problem.

> 
> > It sounds like the consequences of running out of LUT entries are
> > catastrophic, e.g., memory corruption from mis-directed DMA?  If
> > that's possible, I think we need to figure out how to prevent the
> > device from being used, not just dev_warn() about it.
> > 
> > Bjorn
Frank Li June 3, 2024, 9:16 p.m. UTC | #15
On Mon, Jun 03, 2024 at 11:29:38PM +0300, Laurentiu Tudor wrote:
> 
> 
> On 5/31/24 17:58, Robin Murphy wrote:
> > On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
> > > [+cc IOMMU and pcie-apple.c folks for comment]
> > > 
> > > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > > For the i.MX95, configuration of a LUT is necessary to convert
> > > > Bus Device
> > > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > > This involves examining the msi-map and smmu-map to ensure consistent
> > > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > > controller is utilized as a fallback.
> > > > 
> > > > Additionally, register a PCI bus notifier to trigger
> > > > imx_pcie_add_device()
> > > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > > controller. This function configures the correct LUT based on
> > > > Device Tree
> > > > Settings (DTS).
> > > 
> > > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > > have to do this, I wish it were *more* similar, i.e., copy the
> > > function names, bitmap tracking, code structure, etc.
> > > 
> > > I don't really know how stream IDs work, but I assume they are used on
> > > most or all arm64 platforms, so I'm a little surprised that of all the
> > > PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> > > this notifier.
> > 
> > This is one of those things that's mostly at the mercy of the PCIe root
> > complex implementation. Typically the SMMU StreamID and/or GIC ITS
> > DeviceID is derived directly from the PCI RID, sometimes with additional
> > high-order bits hard-wired to disambiguate PCI segments. I believe this
> > RID-translation LUT is a particular feature of the the Synopsys IP - I
> > know there's also one on the NXP Layerscape platforms, but on those it's
> > programmed by the bootloader, which also generates the appropriate
> > "msi-map" and "iommu-map" properties to match. Ideally that's what i.MX
> > should do as well, but hey.
> 
> That's usually fine, except when SRIOV and/or hotplug devices (that is, not
> discoverable at bootloader time) come into play. We came up with this
> "solution" to cover these more dynamic scenarios.
> 
> https://source.denx.de/u-boot/u-boot/-/commit/2a5bbb13cc39102a68fcc31056925427ab44b591

If my understand is correct, basic it was still pre-allocate method. Just
reserve more streams id for SRIOV and hotplugs. It was not really depend
on what devices in system.

Frank

> 
> ---
> Best Regards, Laurentiu
> 
> > > There's this path, which is pretty generic and does at least the
> > > of_map_id() part of what you're doing in imx_pcie_add_device():
> > > 
> > >      __driver_probe_device
> > >        really_probe
> > >          pci_dma_configure                       #
> > > pci_bus_type.dma_configure
> > >            of_dma_configure
> > >              of_dma_configure_id
> > >                of_iommu_configure
> > >                  of_pci_iommu_init
> > >                    of_iommu_configure_dev_id
> > >                      of_map_id
> > >                      of_iommu_xlate
> > >                        ops = iommu_ops_from_fwnode
> > >                        iommu_fwspec_init
> > >                        ops->of_xlate(dev, iommu_spec)
> > > 
> > > Maybe this needs to be extended somehow with a hook to do the
> > > device-specific work like updating the LUT?  Just speculating here,
> > > the IOMMU folks will know how this is expected to work.
> > 
> > Note that that particular code path has fundamental issues and much of
> > it needs to go away (I'm working on it, but it's a rich ~8-year-old pile
> > of technical debt...). IOMMU configuration needs to be happening at
> > device_add() time via the IOMMU layer's own bus notifier.
> > 
> > If it's really necessary to do this programming from Linux, then there's
> > still no point in it being dynamic - the mappings cannot ever change,
> > since the rest of the kernel believes that what the DT said at boot time
> > was already a property of the hardware. It would be a lot more logical,
> > and likely simpler, for the driver to just read the relevant map
> > property and program the entire LUT to match, all in one go at
> > controller probe time. Rather like what's already commonly done with the
> > parsing of "dma-ranges" to program address-translation LUTs for inbound
> > windows.
> > 
> > Plus that would also give a chance of safely dealing with bad DTs
> > specifying invalid ID mappings (by refusing to probe at all). As it is,
> > returning an error from a child's BUS_NOTIFY_ADD_DEVICE does nothing
> > except prevent any further notifiers from running at that point - the
> > device will still be added, allowed to bind a driver, and able to start
> > sending DMA/MSI traffic without the controller being correctly
> > programmed, which at best won't work and at worst may break the whole
> > system.
> > 
> > Thanks,
> > Robin.
> > 
> > _______________________________________________
> > linux-arm-kernel mailing list
> > linux-arm-kernel@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Marc Zyngier June 4, 2024, 3:25 p.m. UTC | #16
On Mon, 03 Jun 2024 22:04:23 +0100,
Frank Li <Frank.li@nxp.com> wrote:
> 
> iommu may share one stream id for multi-devices. but ITS MSI can't. each
> device's MSI index start from 0. It needs difference stream id for each
> device.

That's not quite true. We go through all sort of hoops to find about
device aliasing on PCI and allow devices that translate into the same
DID to get MSIs.

Of course, just like the IOMMU, you lose any form of isolation, but
you get what you pay for.

	M.
Frank Li June 4, 2024, 3:47 p.m. UTC | #17
On Tue, Jun 04, 2024 at 04:25:51PM +0100, Marc Zyngier wrote:
> On Mon, 03 Jun 2024 22:04:23 +0100,
> Frank Li <Frank.li@nxp.com> wrote:
> > 
> > iommu may share one stream id for multi-devices. but ITS MSI can't. each
> > device's MSI index start from 0. It needs difference stream id for each
> > device.
> 
> That's not quite true. We go through all sort of hoops to find about
> device aliasing on PCI and allow devices that translate into the same
> DID to get MSIs.

Could you please point me the code location? I remember I met error when I
try to enable MSI for EP support one year ago.

https://lore.kernel.org/imx/20221124055036.1630573-1-Frank.Li@nxp.com/

I remember that it is failure try to allocate more irqs with the same DID. 

Frank

> 
> Of course, just like the IOMMU, you lose any form of isolation, but
> you get what you pay for.

Anyways, if there are one quirk in PCI system, bridge driver can detect
PCI devices add/remove. it will be better if assign stream ID as need,
compared with static allocate.

> 
> 	M.
> 
> -- 
> Without deviation from the norm, progress is not possible.
Frank Li June 6, 2024, 8:24 p.m. UTC | #18
On Mon, Jun 03, 2024 at 04:07:55PM -0400, Frank Li wrote:
> On Mon, Jun 03, 2024 at 01:56:27PM -0500, Bjorn Helgaas wrote:
> > On Mon, Jun 03, 2024 at 02:42:45PM -0400, Frank Li wrote:
> > > On Mon, Jun 03, 2024 at 12:19:21PM -0500, Bjorn Helgaas wrote:
> > > > On Fri, May 31, 2024 at 03:58:49PM +0100, Robin Murphy wrote:
> > > > > On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
> > > > > > [+cc IOMMU and pcie-apple.c folks for comment]
> > > > > > 
> > > > > > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > > > > > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > > > > > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > > > > > This involves examining the msi-map and smmu-map to ensure consistent
> > > > > > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > > > > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > > > > > controller is utilized as a fallback.
> > > > > > > 
> > > > > > > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > > > > > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > > > > > controller. This function configures the correct LUT based on Device Tree
> > > > > > > Settings (DTS).
> > > > > > 
> > > > > > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > > > > > have to do this, I wish it were *more* similar, i.e., copy the
> > > > > > function names, bitmap tracking, code structure, etc.
> > > > > > 
> > > > > > I don't really know how stream IDs work, but I assume they are used on
> > > > > > most or all arm64 platforms, so I'm a little surprised that of all the
> > > > > > PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> > > > > > this notifier.
> > > > > 
> > > > > This is one of those things that's mostly at the mercy of the PCIe root
> > > > > complex implementation. Typically the SMMU StreamID and/or GIC ITS DeviceID
> > > > > is derived directly from the PCI RID, sometimes with additional high-order
> > > > > bits hard-wired to disambiguate PCI segments. I believe this RID-translation
> > > > > LUT is a particular feature of the the Synopsys IP - I know there's also one
> > > > > on the NXP Layerscape platforms, but on those it's programmed by the
> > > > > bootloader, which also generates the appropriate "msi-map" and "iommu-map"
> > > > > properties to match. Ideally that's what i.MX should do as well, but hey.
> > > > 
> > > > Maybe this RID-translation is a feature of i.MX, not of Synopsys?  I
> > > > see that the LUT CSR accesses use IMX95_* definitions.
> > > 
> > > Yes, it convert 16bit RID to 6bit stream id.
> > 
> > IIUC, you're saying this is not a Synopsys feature, it's an i.MX
> > feature.
> 
> Yes, it is i.MX feature. But I think other vendor should have similar
> situation if use old arm smmu.
> 
> > 
> > > > > If it's really necessary to do this programming from Linux, then there's
> > > > > still no point in it being dynamic - the mappings cannot ever change, since
> > > > > the rest of the kernel believes that what the DT said at boot time was
> > > > > already a property of the hardware. It would be a lot more logical, and
> > > > > likely simpler, for the driver to just read the relevant map property and
> > > > > program the entire LUT to match, all in one go at controller probe time.
> > > > > Rather like what's already commonly done with the parsing of "dma-ranges" to
> > > > > program address-translation LUTs for inbound windows.
> > > > > 
> > > > > Plus that would also give a chance of safely dealing with bad DTs specifying
> > > > > invalid ID mappings (by refusing to probe at all). As it is, returning an
> > > > > error from a child's BUS_NOTIFY_ADD_DEVICE does nothing except prevent any
> > > > > further notifiers from running at that point - the device will still be
> > > > > added, allowed to bind a driver, and able to start sending DMA/MSI traffic
> > > > > without the controller being correctly programmed, which at best won't work
> > > > > and at worst may break the whole system.
> > > > 
> > > > Frank, could the imx LUT be programmed once at boot-time instead of at
> > > > device-add time?  I'm guessing maybe not because apparently there is a
> > > > risk of running out of LUT entries?
> > > 
> > > It is not good idea to depend on boot loader so much.
> > 
> > I meant "could this be programmed once when the Linux imx host
> > controller driver is probed?"  But from the below, it sounds like
> > that's not possible in general because you don't have enough stream
> > IDs to do that.
> 
> Oh! sorry miss understand what your means. It is possible like what I did
> at v3 version. But I think it is not good enough. 
> 
> > 
> > > Some hot plug devics
> > > (SD7.0) may plug after system boot. Two PCIe instances shared one set
> > > of 6bits stream id (total 64). Assume total 16 assign to two PCIe
> > > controllers. each have 8 stream id. If use uboot assign it static, each
> > > PCIe controller have below 8 devices.  It will be failrue one controller
> > > connect 7, another connect 9. but if dynamtic alloc when devices add, both
> > > controller can work.
> > > 
> > > Although we have not so much devices now,  this way give us possility to
> > > improve it in future.
> > > 
> > > > It sounds like the consequences of running out of LUT entries are
> > > > catastrophic, e.g., memory corruption from mis-directed DMA?  If
> > > > that's possible, I think we need to figure out how to prevent the
> > > > device from being used, not just dev_warn() about it.
> > > 
> > > Yes, but so far, we have not met such problem now. We can improve it when
> > > we really face such problem.
> > 
> > If this controller can only support DMA from a limited number of
> > endpoints below it, I think we should figure out how to enforce that
> > directly.  Maybe we can prevent drivers from enabling bus mastering or
> > something.  I'm not happy with the idea of waiting for and debugging a
> > report of data corruption.
> 
> It may add a pre-add hook function to pci bridge. let me do more research.

Hi Bjorn:

int pci_setup_device(struct pci_dev *dev)
{
	dev->error_state = pci_channel_io_normal;
	...
	pci_fixup_device(pci_fixup_early, dev);

	^^^ I can add fixup hook for pci_fixup_early. If not resource, 
I can set dev->error_state to pci_channel_io_frozen or
pci_channel_io_perm_failure
	
	And add below check here after call hook function.

	if (dev->error_state != pci_channel_io_normal)
		return -EIO;
		
}

How do you think this method? If you agree, I can continue search device
remove hook up.

Frank

> 
> Frank
> 
> > 
> > Bjorn
Bjorn Helgaas June 13, 2024, 10:41 p.m. UTC | #19
On Thu, Jun 06, 2024 at 04:24:17PM -0400, Frank Li wrote:
> On Mon, Jun 03, 2024 at 04:07:55PM -0400, Frank Li wrote:
> > On Mon, Jun 03, 2024 at 01:56:27PM -0500, Bjorn Helgaas wrote:
> > > On Mon, Jun 03, 2024 at 02:42:45PM -0400, Frank Li wrote:
> > > > On Mon, Jun 03, 2024 at 12:19:21PM -0500, Bjorn Helgaas wrote:
> > > > > On Fri, May 31, 2024 at 03:58:49PM +0100, Robin Murphy wrote:
> > > > > > On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
> > > > > > > [+cc IOMMU and pcie-apple.c folks for comment]
> > > > > > > 
> > > > > > > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > > > > > > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > > > > > > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > > > > > > This involves examining the msi-map and smmu-map to ensure consistent
> > > > > > > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > > > > > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > > > > > > controller is utilized as a fallback.
> > > > > > > > 
> > > > > > > > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > > > > > > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > > > > > > controller. This function configures the correct LUT based on Device Tree
> > > > > > > > Settings (DTS).
> > > > > > > 
> > > > > > > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > > > > > > have to do this, I wish it were *more* similar, i.e., copy the
> > > > > > > function names, bitmap tracking, code structure, etc.
> > > > > > > 
> > > > > > > I don't really know how stream IDs work, but I assume they are used on
> > > > > > > most or all arm64 platforms, so I'm a little surprised that of all the
> > > > > > > PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> > > > > > > this notifier.
> > > > > > 
> > > > > > This is one of those things that's mostly at the mercy of the PCIe root
> > > > > > complex implementation. Typically the SMMU StreamID and/or GIC ITS DeviceID
> > > > > > is derived directly from the PCI RID, sometimes with additional high-order
> > > > > > bits hard-wired to disambiguate PCI segments. I believe this RID-translation
> > > > > > LUT is a particular feature of the the Synopsys IP - I know there's also one
> > > > > > on the NXP Layerscape platforms, but on those it's programmed by the
> > > > > > bootloader, which also generates the appropriate "msi-map" and "iommu-map"
> > > > > > properties to match. Ideally that's what i.MX should do as well, but hey.
> > > > > 
> > > > > Maybe this RID-translation is a feature of i.MX, not of Synopsys?  I
> > > > > see that the LUT CSR accesses use IMX95_* definitions.
> > > > 
> > > > Yes, it convert 16bit RID to 6bit stream id.
> > > 
> > > IIUC, you're saying this is not a Synopsys feature, it's an i.MX
> > > feature.
> > 
> > Yes, it is i.MX feature. But I think other vendor should have similar
> > situation if use old arm smmu.
> > 
> > > 
> > > > > > If it's really necessary to do this programming from Linux, then there's
> > > > > > still no point in it being dynamic - the mappings cannot ever change, since
> > > > > > the rest of the kernel believes that what the DT said at boot time was
> > > > > > already a property of the hardware. It would be a lot more logical, and
> > > > > > likely simpler, for the driver to just read the relevant map property and
> > > > > > program the entire LUT to match, all in one go at controller probe time.
> > > > > > Rather like what's already commonly done with the parsing of "dma-ranges" to
> > > > > > program address-translation LUTs for inbound windows.
> > > > > > 
> > > > > > Plus that would also give a chance of safely dealing with bad DTs specifying
> > > > > > invalid ID mappings (by refusing to probe at all). As it is, returning an
> > > > > > error from a child's BUS_NOTIFY_ADD_DEVICE does nothing except prevent any
> > > > > > further notifiers from running at that point - the device will still be
> > > > > > added, allowed to bind a driver, and able to start sending DMA/MSI traffic
> > > > > > without the controller being correctly programmed, which at best won't work
> > > > > > and at worst may break the whole system.
> > > > > 
> > > > > Frank, could the imx LUT be programmed once at boot-time instead of at
> > > > > device-add time?  I'm guessing maybe not because apparently there is a
> > > > > risk of running out of LUT entries?
> > > > 
> > > > It is not good idea to depend on boot loader so much.
> > > 
> > > I meant "could this be programmed once when the Linux imx host
> > > controller driver is probed?"  But from the below, it sounds like
> > > that's not possible in general because you don't have enough stream
> > > IDs to do that.
> > 
> > Oh! sorry miss understand what your means. It is possible like what I did
> > at v3 version. But I think it is not good enough. 
> > 
> > > 
> > > > Some hot plug devics
> > > > (SD7.0) may plug after system boot. Two PCIe instances shared one set
> > > > of 6bits stream id (total 64). Assume total 16 assign to two PCIe
> > > > controllers. each have 8 stream id. If use uboot assign it static, each
> > > > PCIe controller have below 8 devices.  It will be failrue one controller
> > > > connect 7, another connect 9. but if dynamtic alloc when devices add, both
> > > > controller can work.
> > > > 
> > > > Although we have not so much devices now,  this way give us possility to
> > > > improve it in future.
> > > > 
> > > > > It sounds like the consequences of running out of LUT entries are
> > > > > catastrophic, e.g., memory corruption from mis-directed DMA?  If
> > > > > that's possible, I think we need to figure out how to prevent the
> > > > > device from being used, not just dev_warn() about it.
> > > > 
> > > > Yes, but so far, we have not met such problem now. We can improve it when
> > > > we really face such problem.
> > > 
> > > If this controller can only support DMA from a limited number of
> > > endpoints below it, I think we should figure out how to enforce that
> > > directly.  Maybe we can prevent drivers from enabling bus mastering or
> > > something.  I'm not happy with the idea of waiting for and debugging a
> > > report of data corruption.
> > 
> > It may add a pre-add hook function to pci bridge. let me do more research.
> 
> Hi Bjorn:
> 
> int pci_setup_device(struct pci_dev *dev)
> {
> 	dev->error_state = pci_channel_io_normal;
> 	...
> 	pci_fixup_device(pci_fixup_early, dev);
> 
> 	^^^ I can add fixup hook for pci_fixup_early. If not resource, 
> I can set dev->error_state to pci_channel_io_frozen or
> pci_channel_io_perm_failure
> 	
> 	And add below check here after call hook function.
> 
> 	if (dev->error_state != pci_channel_io_normal)
> 		return -EIO;
> 		
> }
> 
> How do you think this method? If you agree, I can continue search device
> remove hook up.

I think this would mean the device would not appear to be enumerated
at all, right?  I.e., it wouldn't show up in lspci?  And we couldn't
use even a pure programmed IO driver with no DMA or MSI?

I wonder if we should have a function pointer in struct
pci_host_bridge, kind of like the existing ->map_irq(), where we could
do host bridge-specific setup when enumerating a PCI device.

We'd still have to solve the issue of preventing DMA, but a hook like
that might avoid the need for a quirk or the bus notifier approach.

Bjorn
Frank Li June 17, 2024, 2:26 p.m. UTC | #20
On Thu, Jun 13, 2024 at 05:41:25PM -0500, Bjorn Helgaas wrote:
> On Thu, Jun 06, 2024 at 04:24:17PM -0400, Frank Li wrote:
> > On Mon, Jun 03, 2024 at 04:07:55PM -0400, Frank Li wrote:
> > > On Mon, Jun 03, 2024 at 01:56:27PM -0500, Bjorn Helgaas wrote:
> > > > On Mon, Jun 03, 2024 at 02:42:45PM -0400, Frank Li wrote:
> > > > > On Mon, Jun 03, 2024 at 12:19:21PM -0500, Bjorn Helgaas wrote:
> > > > > > On Fri, May 31, 2024 at 03:58:49PM +0100, Robin Murphy wrote:
> > > > > > > On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
> > > > > > > > [+cc IOMMU and pcie-apple.c folks for comment]
> > > > > > > > 
> > > > > > > > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > > > > > > > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > > > > > > > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > > > > > > > This involves examining the msi-map and smmu-map to ensure consistent
> > > > > > > > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > > > > > > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > > > > > > > controller is utilized as a fallback.
> > > > > > > > > 
> > > > > > > > > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > > > > > > > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > > > > > > > controller. This function configures the correct LUT based on Device Tree
> > > > > > > > > Settings (DTS).
> > > > > > > > 
> > > > > > > > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > > > > > > > have to do this, I wish it were *more* similar, i.e., copy the
> > > > > > > > function names, bitmap tracking, code structure, etc.
> > > > > > > > 
> > > > > > > > I don't really know how stream IDs work, but I assume they are used on
> > > > > > > > most or all arm64 platforms, so I'm a little surprised that of all the
> > > > > > > > PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> > > > > > > > this notifier.
> > > > > > > 
> > > > > > > This is one of those things that's mostly at the mercy of the PCIe root
> > > > > > > complex implementation. Typically the SMMU StreamID and/or GIC ITS DeviceID
> > > > > > > is derived directly from the PCI RID, sometimes with additional high-order
> > > > > > > bits hard-wired to disambiguate PCI segments. I believe this RID-translation
> > > > > > > LUT is a particular feature of the the Synopsys IP - I know there's also one
> > > > > > > on the NXP Layerscape platforms, but on those it's programmed by the
> > > > > > > bootloader, which also generates the appropriate "msi-map" and "iommu-map"
> > > > > > > properties to match. Ideally that's what i.MX should do as well, but hey.
> > > > > > 
> > > > > > Maybe this RID-translation is a feature of i.MX, not of Synopsys?  I
> > > > > > see that the LUT CSR accesses use IMX95_* definitions.
> > > > > 
> > > > > Yes, it convert 16bit RID to 6bit stream id.
> > > > 
> > > > IIUC, you're saying this is not a Synopsys feature, it's an i.MX
> > > > feature.
> > > 
> > > Yes, it is i.MX feature. But I think other vendor should have similar
> > > situation if use old arm smmu.
> > > 
> > > > 
> > > > > > > If it's really necessary to do this programming from Linux, then there's
> > > > > > > still no point in it being dynamic - the mappings cannot ever change, since
> > > > > > > the rest of the kernel believes that what the DT said at boot time was
> > > > > > > already a property of the hardware. It would be a lot more logical, and
> > > > > > > likely simpler, for the driver to just read the relevant map property and
> > > > > > > program the entire LUT to match, all in one go at controller probe time.
> > > > > > > Rather like what's already commonly done with the parsing of "dma-ranges" to
> > > > > > > program address-translation LUTs for inbound windows.
> > > > > > > 
> > > > > > > Plus that would also give a chance of safely dealing with bad DTs specifying
> > > > > > > invalid ID mappings (by refusing to probe at all). As it is, returning an
> > > > > > > error from a child's BUS_NOTIFY_ADD_DEVICE does nothing except prevent any
> > > > > > > further notifiers from running at that point - the device will still be
> > > > > > > added, allowed to bind a driver, and able to start sending DMA/MSI traffic
> > > > > > > without the controller being correctly programmed, which at best won't work
> > > > > > > and at worst may break the whole system.
> > > > > > 
> > > > > > Frank, could the imx LUT be programmed once at boot-time instead of at
> > > > > > device-add time?  I'm guessing maybe not because apparently there is a
> > > > > > risk of running out of LUT entries?
> > > > > 
> > > > > It is not good idea to depend on boot loader so much.
> > > > 
> > > > I meant "could this be programmed once when the Linux imx host
> > > > controller driver is probed?"  But from the below, it sounds like
> > > > that's not possible in general because you don't have enough stream
> > > > IDs to do that.
> > > 
> > > Oh! sorry miss understand what your means. It is possible like what I did
> > > at v3 version. But I think it is not good enough. 
> > > 
> > > > 
> > > > > Some hot plug devics
> > > > > (SD7.0) may plug after system boot. Two PCIe instances shared one set
> > > > > of 6bits stream id (total 64). Assume total 16 assign to two PCIe
> > > > > controllers. each have 8 stream id. If use uboot assign it static, each
> > > > > PCIe controller have below 8 devices.  It will be failrue one controller
> > > > > connect 7, another connect 9. but if dynamtic alloc when devices add, both
> > > > > controller can work.
> > > > > 
> > > > > Although we have not so much devices now,  this way give us possility to
> > > > > improve it in future.
> > > > > 
> > > > > > It sounds like the consequences of running out of LUT entries are
> > > > > > catastrophic, e.g., memory corruption from mis-directed DMA?  If
> > > > > > that's possible, I think we need to figure out how to prevent the
> > > > > > device from being used, not just dev_warn() about it.
> > > > > 
> > > > > Yes, but so far, we have not met such problem now. We can improve it when
> > > > > we really face such problem.
> > > > 
> > > > If this controller can only support DMA from a limited number of
> > > > endpoints below it, I think we should figure out how to enforce that
> > > > directly.  Maybe we can prevent drivers from enabling bus mastering or
> > > > something.  I'm not happy with the idea of waiting for and debugging a
> > > > report of data corruption.
> > > 
> > > It may add a pre-add hook function to pci bridge. let me do more research.
> > 
> > Hi Bjorn:
> > 
> > int pci_setup_device(struct pci_dev *dev)
> > {
> > 	dev->error_state = pci_channel_io_normal;
> > 	...
> > 	pci_fixup_device(pci_fixup_early, dev);
> > 
> > 	^^^ I can add fixup hook for pci_fixup_early. If not resource, 
> > I can set dev->error_state to pci_channel_io_frozen or
> > pci_channel_io_perm_failure
> > 	
> > 	And add below check here after call hook function.
> > 
> > 	if (dev->error_state != pci_channel_io_normal)
> > 		return -EIO;
> > 		
> > }
> > 
> > How do you think this method? If you agree, I can continue search device
> > remove hook up.
> 
> I think this would mean the device would not appear to be enumerated
> at all, right?  I.e., it wouldn't show up in lspci?  And we couldn't
> use even a pure programmed IO driver with no DMA or MSI?

Make sense. Let me do more research on this.

Frank
> 
> I wonder if we should have a function pointer in struct
> pci_host_bridge, kind of like the existing ->map_irq(), where we could
> do host bridge-specific setup when enumerating a PCI device.
> 
> We'd still have to solve the issue of preventing DMA, but a hook like
> that might avoid the need for a quirk or the bus notifier approach.
> 
> Bjorn
Frank Li June 21, 2024, 10:29 p.m. UTC | #21
On Mon, Jun 17, 2024 at 10:26:36AM -0400, Frank Li wrote:
> On Thu, Jun 13, 2024 at 05:41:25PM -0500, Bjorn Helgaas wrote:
> > On Thu, Jun 06, 2024 at 04:24:17PM -0400, Frank Li wrote:
> > > On Mon, Jun 03, 2024 at 04:07:55PM -0400, Frank Li wrote:
> > > > On Mon, Jun 03, 2024 at 01:56:27PM -0500, Bjorn Helgaas wrote:
> > > > > On Mon, Jun 03, 2024 at 02:42:45PM -0400, Frank Li wrote:
> > > > > > On Mon, Jun 03, 2024 at 12:19:21PM -0500, Bjorn Helgaas wrote:
> > > > > > > On Fri, May 31, 2024 at 03:58:49PM +0100, Robin Murphy wrote:
> > > > > > > > On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
> > > > > > > > > [+cc IOMMU and pcie-apple.c folks for comment]
> > > > > > > > >
> > > > > > > > > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > > > > > > > > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > > > > > > > > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > > > > > > > > This involves examining the msi-map and smmu-map to ensure consistent
> > > > > > > > > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > > > > > > > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > > > > > > > > controller is utilized as a fallback.
> > > > > > > > > >
> > > > > > > > > > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > > > > > > > > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > > > > > > > > controller. This function configures the correct LUT based on Device Tree
> > > > > > > > > > Settings (DTS).
> > > > > > > > >
> > > > > > > > > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > > > > > > > > have to do this, I wish it were *more* similar, i.e., copy the
> > > > > > > > > function names, bitmap tracking, code structure, etc.
> > > > > > > > >
> > > > > > > > > I don't really know how stream IDs work, but I assume they are used on
> > > > > > > > > most or all arm64 platforms, so I'm a little surprised that of all the
> > > > > > > > > PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> > > > > > > > > this notifier.
> > > > > > > >
> > > > > > > > This is one of those things that's mostly at the mercy of the PCIe root
> > > > > > > > complex implementation. Typically the SMMU StreamID and/or GIC ITS DeviceID
> > > > > > > > is derived directly from the PCI RID, sometimes with additional high-order
> > > > > > > > bits hard-wired to disambiguate PCI segments. I believe this RID-translation
> > > > > > > > LUT is a particular feature of the the Synopsys IP - I know there's also one
> > > > > > > > on the NXP Layerscape platforms, but on those it's programmed by the
> > > > > > > > bootloader, which also generates the appropriate "msi-map" and "iommu-map"
> > > > > > > > properties to match. Ideally that's what i.MX should do as well, but hey.
> > > > > > >
> > > > > > > Maybe this RID-translation is a feature of i.MX, not of Synopsys?  I
> > > > > > > see that the LUT CSR accesses use IMX95_* definitions.
> > > > > >
> > > > > > Yes, it convert 16bit RID to 6bit stream id.
> > > > >
> > > > > IIUC, you're saying this is not a Synopsys feature, it's an i.MX
> > > > > feature.
> > > >
> > > > Yes, it is i.MX feature. But I think other vendor should have similar
> > > > situation if use old arm smmu.
> > > >
> > > > >
> > > > > > > > If it's really necessary to do this programming from Linux, then there's
> > > > > > > > still no point in it being dynamic - the mappings cannot ever change, since
> > > > > > > > the rest of the kernel believes that what the DT said at boot time was
> > > > > > > > already a property of the hardware. It would be a lot more logical, and
> > > > > > > > likely simpler, for the driver to just read the relevant map property and
> > > > > > > > program the entire LUT to match, all in one go at controller probe time.
> > > > > > > > Rather like what's already commonly done with the parsing of "dma-ranges" to
> > > > > > > > program address-translation LUTs for inbound windows.
> > > > > > > >
> > > > > > > > Plus that would also give a chance of safely dealing with bad DTs specifying
> > > > > > > > invalid ID mappings (by refusing to probe at all). As it is, returning an
> > > > > > > > error from a child's BUS_NOTIFY_ADD_DEVICE does nothing except prevent any
> > > > > > > > further notifiers from running at that point - the device will still be
> > > > > > > > added, allowed to bind a driver, and able to start sending DMA/MSI traffic
> > > > > > > > without the controller being correctly programmed, which at best won't work
> > > > > > > > and at worst may break the whole system.
> > > > > > >
> > > > > > > Frank, could the imx LUT be programmed once at boot-time instead of at
> > > > > > > device-add time?  I'm guessing maybe not because apparently there is a
> > > > > > > risk of running out of LUT entries?
> > > > > >
> > > > > > It is not good idea to depend on boot loader so much.
> > > > >
> > > > > I meant "could this be programmed once when the Linux imx host
> > > > > controller driver is probed?"  But from the below, it sounds like
> > > > > that's not possible in general because you don't have enough stream
> > > > > IDs to do that.
> > > >
> > > > Oh! sorry miss understand what your means. It is possible like what I did
> > > > at v3 version. But I think it is not good enough.
> > > >
> > > > >
> > > > > > Some hot plug devics
> > > > > > (SD7.0) may plug after system boot. Two PCIe instances shared one set
> > > > > > of 6bits stream id (total 64). Assume total 16 assign to two PCIe
> > > > > > controllers. each have 8 stream id. If use uboot assign it static, each
> > > > > > PCIe controller have below 8 devices.  It will be failrue one controller
> > > > > > connect 7, another connect 9. but if dynamtic alloc when devices add, both
> > > > > > controller can work.
> > > > > >
> > > > > > Although we have not so much devices now,  this way give us possility to
> > > > > > improve it in future.
> > > > > >
> > > > > > > It sounds like the consequences of running out of LUT entries are
> > > > > > > catastrophic, e.g., memory corruption from mis-directed DMA?  If
> > > > > > > that's possible, I think we need to figure out how to prevent the
> > > > > > > device from being used, not just dev_warn() about it.
> > > > > >
> > > > > > Yes, but so far, we have not met such problem now. We can improve it when
> > > > > > we really face such problem.
> > > > >
> > > > > If this controller can only support DMA from a limited number of
> > > > > endpoints below it, I think we should figure out how to enforce that
> > > > > directly.  Maybe we can prevent drivers from enabling bus mastering or
> > > > > something.  I'm not happy with the idea of waiting for and debugging a
> > > > > report of data corruption.
> > > >
> > > > It may add a pre-add hook function to pci bridge. let me do more research.
> > >
> > > Hi Bjorn:
> > >
> > > int pci_setup_device(struct pci_dev *dev)
> > > {
> > > 	dev->error_state = pci_channel_io_normal;
> > > 	...
> > > 	pci_fixup_device(pci_fixup_early, dev);
> > >
> > > 	^^^ I can add fixup hook for pci_fixup_early. If not resource,
> > > I can set dev->error_state to pci_channel_io_frozen or
> > > pci_channel_io_perm_failure
> > >
> > > 	And add below check here after call hook function.
> > >
> > > 	if (dev->error_state != pci_channel_io_normal)
> > > 		return -EIO;
> > >
> > > }
> > >
> > > How do you think this method? If you agree, I can continue search device
> > > remove hook up.
> >
> > I think this would mean the device would not appear to be enumerated
> > at all, right?  I.e., it wouldn't show up in lspci?  And we couldn't
> > use even a pure programmed IO driver with no DMA or MSI?
>
> Make sense. Let me do more research on this.
>
> Frank
> >
> > I wonder if we should have a function pointer in struct
> > pci_host_bridge, kind of like the existing ->map_irq(), where we could
> > do host bridge-specific setup when enumerating a PCI device.

Consider some device may no use MSI or DMA. It'd better set LUT when
allocate msi irq. I think insert a irq-domain in irq hierarchy.

static const struct irq_domain_ops lut_pcie_msi_domain_ops = {
        .alloc  = lut_pcie_irq_domain_alloc,
        .free   = lut_pcie_irq_domain_free,
};

int dw_pcie_allocate_domains(struct dw_pcie_rp *pp)
{
        struct fwnode_handle *fwnode = of_node_to_fwnode(pci->dev->of_node);

        pp->irq_domain = irq_domain_create_hierarchy(...)

        pp->msi_domain = pci_msi_create_irq_domain(...);

        return 0;
}

Manage lut stream id in lut_pcie_irq_domain_alloc() and
lut_pcie_irq_domain_free().

So failure happen only when driver use MSI and no-stream ID avaiable. It
should be better than failure when add devices. Some devices may not use
at all.

How do you think this method? If it is okay, I can try research how to
set LUT when do dma_mapping(unmap). 

Frank Li
> >
> > We'd still have to solve the issue of preventing DMA, but a hook like
> > that might avoid the need for a quirk or the bus notifier approach.
> >
> > Bjorn
Bjorn Helgaas June 21, 2024, 10:43 p.m. UTC | #22
On Fri, Jun 21, 2024 at 06:29:48PM -0400, Frank Li wrote:
> On Mon, Jun 17, 2024 at 10:26:36AM -0400, Frank Li wrote:
> > On Thu, Jun 13, 2024 at 05:41:25PM -0500, Bjorn Helgaas wrote:
> > > On Thu, Jun 06, 2024 at 04:24:17PM -0400, Frank Li wrote:
> > > > On Mon, Jun 03, 2024 at 04:07:55PM -0400, Frank Li wrote:
> > > > > On Mon, Jun 03, 2024 at 01:56:27PM -0500, Bjorn Helgaas wrote:
> > > > > > On Mon, Jun 03, 2024 at 02:42:45PM -0400, Frank Li wrote:
> > > > > > > On Mon, Jun 03, 2024 at 12:19:21PM -0500, Bjorn Helgaas wrote:
> > > > > > > > On Fri, May 31, 2024 at 03:58:49PM +0100, Robin Murphy wrote:
> > > > > > > > > On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
> > > > > > > > > > [+cc IOMMU and pcie-apple.c folks for comment]
> > > > > > > > > >
> > > > > > > > > > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > > > > > > > > > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > > > > > > > > > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > > > > > > > > > This involves examining the msi-map and smmu-map to ensure consistent
> > > > > > > > > > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > > > > > > > > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > > > > > > > > > controller is utilized as a fallback.
> > > > > > > > > > >
> > > > > > > > > > > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > > > > > > > > > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > > > > > > > > > controller. This function configures the correct LUT based on Device Tree
> > > > > > > > > > > Settings (DTS).
> > > > > > > > > >
> > > > > > > > > > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > > > > > > > > > have to do this, I wish it were *more* similar, i.e., copy the
> > > > > > > > > > function names, bitmap tracking, code structure, etc.
> > > > > > > > > >
> > > > > > > > > > I don't really know how stream IDs work, but I assume they are used on
> > > > > > > > > > most or all arm64 platforms, so I'm a little surprised that of all the
> > > > > > > > > > PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> > > > > > > > > > this notifier.
> > > > > > > > >
> > > > > > > > > This is one of those things that's mostly at the mercy of the PCIe root
> > > > > > > > > complex implementation. Typically the SMMU StreamID and/or GIC ITS DeviceID
> > > > > > > > > is derived directly from the PCI RID, sometimes with additional high-order
> > > > > > > > > bits hard-wired to disambiguate PCI segments. I believe this RID-translation
> > > > > > > > > LUT is a particular feature of the the Synopsys IP - I know there's also one
> > > > > > > > > on the NXP Layerscape platforms, but on those it's programmed by the
> > > > > > > > > bootloader, which also generates the appropriate "msi-map" and "iommu-map"
> > > > > > > > > properties to match. Ideally that's what i.MX should do as well, but hey.
> > > > > > > >
> > > > > > > > Maybe this RID-translation is a feature of i.MX, not of Synopsys?  I
> > > > > > > > see that the LUT CSR accesses use IMX95_* definitions.
> > > > > > >
> > > > > > > Yes, it convert 16bit RID to 6bit stream id.
> > > > > >
> > > > > > IIUC, you're saying this is not a Synopsys feature, it's an i.MX
> > > > > > feature.
> > > > >
> > > > > Yes, it is i.MX feature. But I think other vendor should have similar
> > > > > situation if use old arm smmu.
> > > > >
> > > > > >
> > > > > > > > > If it's really necessary to do this programming from Linux, then there's
> > > > > > > > > still no point in it being dynamic - the mappings cannot ever change, since
> > > > > > > > > the rest of the kernel believes that what the DT said at boot time was
> > > > > > > > > already a property of the hardware. It would be a lot more logical, and
> > > > > > > > > likely simpler, for the driver to just read the relevant map property and
> > > > > > > > > program the entire LUT to match, all in one go at controller probe time.
> > > > > > > > > Rather like what's already commonly done with the parsing of "dma-ranges" to
> > > > > > > > > program address-translation LUTs for inbound windows.
> > > > > > > > >
> > > > > > > > > Plus that would also give a chance of safely dealing with bad DTs specifying
> > > > > > > > > invalid ID mappings (by refusing to probe at all). As it is, returning an
> > > > > > > > > error from a child's BUS_NOTIFY_ADD_DEVICE does nothing except prevent any
> > > > > > > > > further notifiers from running at that point - the device will still be
> > > > > > > > > added, allowed to bind a driver, and able to start sending DMA/MSI traffic
> > > > > > > > > without the controller being correctly programmed, which at best won't work
> > > > > > > > > and at worst may break the whole system.
> > > > > > > >
> > > > > > > > Frank, could the imx LUT be programmed once at boot-time instead of at
> > > > > > > > device-add time?  I'm guessing maybe not because apparently there is a
> > > > > > > > risk of running out of LUT entries?
> > > > > > >
> > > > > > > It is not good idea to depend on boot loader so much.
> > > > > >
> > > > > > I meant "could this be programmed once when the Linux imx host
> > > > > > controller driver is probed?"  But from the below, it sounds like
> > > > > > that's not possible in general because you don't have enough stream
> > > > > > IDs to do that.
> > > > >
> > > > > Oh! sorry miss understand what your means. It is possible like what I did
> > > > > at v3 version. But I think it is not good enough.
> > > > >
> > > > > >
> > > > > > > Some hot plug devics
> > > > > > > (SD7.0) may plug after system boot. Two PCIe instances shared one set
> > > > > > > of 6bits stream id (total 64). Assume total 16 assign to two PCIe
> > > > > > > controllers. each have 8 stream id. If use uboot assign it static, each
> > > > > > > PCIe controller have below 8 devices.  It will be failrue one controller
> > > > > > > connect 7, another connect 9. but if dynamtic alloc when devices add, both
> > > > > > > controller can work.
> > > > > > >
> > > > > > > Although we have not so much devices now,  this way give us possility to
> > > > > > > improve it in future.
> > > > > > >
> > > > > > > > It sounds like the consequences of running out of LUT entries are
> > > > > > > > catastrophic, e.g., memory corruption from mis-directed DMA?  If
> > > > > > > > that's possible, I think we need to figure out how to prevent the
> > > > > > > > device from being used, not just dev_warn() about it.
> > > > > > >
> > > > > > > Yes, but so far, we have not met such problem now. We can improve it when
> > > > > > > we really face such problem.
> > > > > >
> > > > > > If this controller can only support DMA from a limited number of
> > > > > > endpoints below it, I think we should figure out how to enforce that
> > > > > > directly.  Maybe we can prevent drivers from enabling bus mastering or
> > > > > > something.  I'm not happy with the idea of waiting for and debugging a
> > > > > > report of data corruption.
> > > > >
> > > > > It may add a pre-add hook function to pci bridge. let me do more research.
> > > >
> > > > Hi Bjorn:
> > > >
> > > > int pci_setup_device(struct pci_dev *dev)
> > > > {
> > > > 	dev->error_state = pci_channel_io_normal;
> > > > 	...
> > > > 	pci_fixup_device(pci_fixup_early, dev);
> > > >
> > > > 	^^^ I can add fixup hook for pci_fixup_early. If not resource,
> > > > I can set dev->error_state to pci_channel_io_frozen or
> > > > pci_channel_io_perm_failure
> > > >
> > > > 	And add below check here after call hook function.
> > > >
> > > > 	if (dev->error_state != pci_channel_io_normal)
> > > > 		return -EIO;
> > > >
> > > > }
> > > >
> > > > How do you think this method? If you agree, I can continue search device
> > > > remove hook up.
> > >
> > > I think this would mean the device would not appear to be enumerated
> > > at all, right?  I.e., it wouldn't show up in lspci?  And we couldn't
> > > use even a pure programmed IO driver with no DMA or MSI?
> >
> > Make sense. Let me do more research on this.
> >
> > Frank
> > >
> > > I wonder if we should have a function pointer in struct
> > > pci_host_bridge, kind of like the existing ->map_irq(), where we could
> > > do host bridge-specific setup when enumerating a PCI device.
> 
> Consider some device may no use MSI or DMA. It'd better set LUT when
> allocate msi irq. I think insert a irq-domain in irq hierarchy.
> 
> static const struct irq_domain_ops lut_pcie_msi_domain_ops = {
>         .alloc  = lut_pcie_irq_domain_alloc,
>         .free   = lut_pcie_irq_domain_free,
> };
> 
> int dw_pcie_allocate_domains(struct dw_pcie_rp *pp)
> {
>         struct fwnode_handle *fwnode = of_node_to_fwnode(pci->dev->of_node);
> 
>         pp->irq_domain = irq_domain_create_hierarchy(...)
> 
>         pp->msi_domain = pci_msi_create_irq_domain(...);
> 
>         return 0;
> }
> 
> Manage lut stream id in lut_pcie_irq_domain_alloc() and
> lut_pcie_irq_domain_free().
> 
> So failure happen only when driver use MSI and no-stream ID avaiable. It
> should be better than failure when add devices. Some devices may not use
> at all.

I'm not an IRQ expert, but it sounds plausible.  There might even be
an opportunity to fall back to INTx if there's no stream ID available
for MSI?
Manivannan Sadhasivam June 22, 2024, 4:11 a.m. UTC | #23
On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> This involves examining the msi-map and smmu-map to ensure consistent
> mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> registers are configured. In the absence of an msi-map, the built-in MSI
> controller is utilized as a fallback.
> 
> Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> controller. This function configures the correct LUT based on Device Tree
> Settings (DTS).
> 

Sorry for jumping the ship very late... But why can't you configure the LUT
during probe() itself? Anyway, you are going to use the 'iommu-map' and
'msi-map' which are static info provided in DT. I don't see a necessity to do it
during device addition time.

Qcom RC driver also uses a similar configuration in
qcom_pcie_config_sid_1_9_0().

- Mani

> Signed-off-by: Frank Li <Frank.Li@nxp.com>
> ---
>  drivers/pci/controller/dwc/pci-imx6.c | 175 +++++++++++++++++++++++++++++++++-
>  1 file changed, 174 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
> index 29309ad0e352b..8ecc00049e20b 100644
> --- a/drivers/pci/controller/dwc/pci-imx6.c
> +++ b/drivers/pci/controller/dwc/pci-imx6.c
> @@ -54,6 +54,22 @@
>  #define IMX95_PE0_GEN_CTRL_3			0x1058
>  #define IMX95_PCIE_LTSSM_EN			BIT(0)
>  
> +#define IMX95_PE0_LUT_ACSCTRL			0x1008
> +#define IMX95_PEO_LUT_RWA			BIT(16)
> +#define IMX95_PE0_LUT_ENLOC			GENMASK(4, 0)
> +
> +#define IMX95_PE0_LUT_DATA1			0x100c
> +#define IMX95_PE0_LUT_VLD			BIT(31)
> +#define IMX95_PE0_LUT_DAC_ID			GENMASK(10, 8)
> +#define IMX95_PE0_LUT_STREAM_ID			GENMASK(5, 0)
> +
> +#define IMX95_PE0_LUT_DATA2			0x1010
> +#define IMX95_PE0_LUT_REQID			GENMASK(31, 16)
> +#define IMX95_PE0_LUT_MASK			GENMASK(15, 0)
> +
> +#define IMX95_SID_MASK				GENMASK(5, 0)
> +#define IMX95_MAX_LUT				32
> +
>  #define to_imx_pcie(x)	dev_get_drvdata((x)->dev)
>  
>  enum imx_pcie_variants {
> @@ -79,6 +95,7 @@ enum imx_pcie_variants {
>  #define IMX_PCIE_FLAG_HAS_PHY_RESET		BIT(5)
>  #define IMX_PCIE_FLAG_HAS_SERDES		BIT(6)
>  #define IMX_PCIE_FLAG_SUPPORT_64BIT		BIT(7)
> +#define IMX_PCIE_FLAG_MONITOR_DEV		BIT(8)
>  
>  #define imx_check_flag(pci, val)     (pci->drvdata->flags & val)
>  
> @@ -132,6 +149,8 @@ struct imx_pcie {
>  	struct device		*pd_pcie_phy;
>  	struct phy		*phy;
>  	const struct imx_pcie_drvdata *drvdata;
> +
> +	struct mutex		lock;
>  };
>  
>  /* Parameters for the waiting for PCIe PHY PLL to lock on i.MX7 */
> @@ -215,6 +234,66 @@ static int imx95_pcie_init_phy(struct imx_pcie *imx_pcie)
>  	return 0;
>  }
>  
> +static int imx_pcie_config_lut(struct imx_pcie *imx_pcie, u16 reqid, u8 sid)
> +{
> +	struct dw_pcie *pci = imx_pcie->pci;
> +	struct device *dev = pci->dev;
> +	u32 data1, data2;
> +	int i;
> +
> +	if (sid >= 64) {
> +		dev_err(dev, "Invalid SID for index %d\n", sid);
> +		return -EINVAL;
> +	}
> +
> +	guard(mutex)(&imx_pcie->lock);
> +
> +	for (i = 0; i < IMX95_MAX_LUT; i++) {
> +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, IMX95_PEO_LUT_RWA | i);
> +
> +		regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, &data1);
> +		if (data1 & IMX95_PE0_LUT_VLD)
> +			continue;
> +
> +		data1 = FIELD_PREP(IMX95_PE0_LUT_DAC_ID, 0);
> +		data1 |= FIELD_PREP(IMX95_PE0_LUT_STREAM_ID, sid);
> +		data1 |= IMX95_PE0_LUT_VLD;
> +
> +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, data1);
> +
> +		data2 = 0xffff;
> +		data2 |= FIELD_PREP(IMX95_PE0_LUT_REQID, reqid);
> +
> +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, data2);
> +
> +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, i);
> +
> +		return 0;
> +	}
> +
> +	dev_err(dev, "All lut already used\n");
> +	return -EINVAL;
> +}
> +
> +static void imx_pcie_remove_lut(struct imx_pcie *imx_pcie, u16 reqid)
> +{
> +	u32 data2 = 0;
> +	int i;
> +
> +	guard(mutex)(&imx_pcie->lock);
> +
> +	for (i = 0; i < IMX95_MAX_LUT; i++) {
> +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, IMX95_PEO_LUT_RWA | i);
> +
> +		regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, &data2);
> +		if (FIELD_GET(IMX95_PE0_LUT_REQID, data2) == reqid) {
> +			regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, 0);
> +			regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, 0);
> +			regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, i);
> +		}
> +	}
> +}
> +
>  static void imx_pcie_configure_type(struct imx_pcie *imx_pcie)
>  {
>  	const struct imx_pcie_drvdata *drvdata = imx_pcie->drvdata;
> @@ -1232,6 +1311,85 @@ static int imx_pcie_resume_noirq(struct device *dev)
>  	return 0;
>  }
>  
> +static bool imx_pcie_match_device(struct pci_bus *bus);
> +
> +static int imx_pcie_add_device(struct imx_pcie *imx_pcie, struct pci_dev *pdev)
> +{
> +	u32 sid_i = 0, sid_m = 0, rid = pci_dev_id(pdev);
> +	struct device *dev = imx_pcie->pci->dev;
> +	int err;
> +
> +	err = of_map_id(dev->of_node, rid, "iommu-map", "iommu-map-mask", NULL, &sid_i);
> +	if (err)
> +		return err;
> +
> +	err = of_map_id(dev->of_node, rid, "msi-map", "msi-map-mask", NULL, &sid_m);
> +	if (err)
> +		return err;
> +
> +	if (sid_i != rid && sid_m != rid)
> +		if ((sid_i & IMX95_SID_MASK) != (sid_m & IMX95_SID_MASK)) {
> +			dev_err(dev, "its and iommu stream id miss match, please check dts file\n");
> +			return -EINVAL;
> +		}
> +
> +	/* if iommu-map is not existed then use msi-map's stream id*/
> +	if (sid_i == rid)
> +		sid_i = sid_m;
> +
> +	sid_i &= IMX95_SID_MASK;
> +
> +	if (sid_i != rid)
> +		return imx_pcie_config_lut(imx_pcie, rid, sid_i);
> +
> +	/* Use dwc built-in MSI controller */
> +	return 0;
> +}
> +
> +static void imx_pcie_del_device(struct imx_pcie *imx_pcie, struct pci_dev *pdev)
> +{
> +	imx_pcie_remove_lut(imx_pcie, pci_dev_id(pdev));
> +}
> +
> +
> +static int imx_pcie_bus_notifier(struct notifier_block *nb, unsigned long action, void *data)
> +{
> +	struct pci_host_bridge *host;
> +	struct imx_pcie *imx_pcie;
> +	struct pci_dev *pdev;
> +	int err;
> +
> +	pdev = to_pci_dev(data);
> +	host = pci_find_host_bridge(pdev->bus);
> +
> +	if (!imx_pcie_match_device(host->bus))
> +		return NOTIFY_OK;
> +
> +	imx_pcie = to_imx_pcie(to_dw_pcie_from_pp(host->sysdata));
> +
> +	if (!imx_check_flag(imx_pcie, IMX_PCIE_FLAG_MONITOR_DEV))
> +		return NOTIFY_OK;
> +
> +	switch (action) {
> +	case BUS_NOTIFY_ADD_DEVICE:
> +		err = imx_pcie_add_device(imx_pcie, pdev);
> +		if (err)
> +			return notifier_from_errno(err);
> +		break;
> +	case BUS_NOTIFY_DEL_DEVICE:
> +		imx_pcie_del_device(imx_pcie, pdev);
> +		break;
> +	default:
> +		return NOTIFY_DONE;
> +	}
> +
> +	return NOTIFY_OK;
> +}
> +
> +static struct notifier_block imx_pcie_nb = {
> +	.notifier_call = imx_pcie_bus_notifier,
> +};
> +
>  static const struct dev_pm_ops imx_pcie_pm_ops = {
>  	NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_pcie_suspend_noirq,
>  				  imx_pcie_resume_noirq)
> @@ -1264,6 +1422,8 @@ static int imx_pcie_probe(struct platform_device *pdev)
>  	imx_pcie->pci = pci;
>  	imx_pcie->drvdata = of_device_get_match_data(dev);
>  
> +	mutex_init(&imx_pcie->lock);
> +
>  	/* Find the PHY if one is defined, only imx7d uses it */
>  	np = of_parse_phandle(node, "fsl,imx7d-pcie-phy", 0);
>  	if (np) {
> @@ -1551,7 +1711,8 @@ static const struct imx_pcie_drvdata drvdata[] = {
>  	},
>  	[IMX95] = {
>  		.variant = IMX95,
> -		.flags = IMX_PCIE_FLAG_HAS_SERDES,
> +		.flags = IMX_PCIE_FLAG_HAS_SERDES |
> +			 IMX_PCIE_FLAG_MONITOR_DEV,
>  		.clk_names = imx8mq_clks,
>  		.clks_cnt = ARRAY_SIZE(imx8mq_clks),
>  		.ltssm_off = IMX95_PE0_GEN_CTRL_3,
> @@ -1687,6 +1848,8 @@ DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_SYNOPSYS, 0xabcd,
>  
>  static int __init imx_pcie_init(void)
>  {
> +	int ret;
> +
>  #ifdef CONFIG_ARM
>  	struct device_node *np;
>  
> @@ -1705,7 +1868,17 @@ static int __init imx_pcie_init(void)
>  	hook_fault_code(8, imx6q_pcie_abort_handler, SIGBUS, 0,
>  			"external abort on non-linefetch");
>  #endif
> +	ret = bus_register_notifier(&pci_bus_type, &imx_pcie_nb);
> +	if (ret)
> +		return ret;
>  
>  	return platform_driver_register(&imx_pcie_driver);
>  }
> +
> +static void __exit imx_pcie_exit(void)
> +{
> +	bus_unregister_notifier(&pci_bus_type, &imx_pcie_nb);
> +}
> +
>  device_initcall(imx_pcie_init);
> +__exitcall(imx_pcie_exit);
> 
> -- 
> 2.34.1
>
Bjorn Helgaas June 22, 2024, 5:38 p.m. UTC | #24
On Fri, Jun 21, 2024 at 05:43:21PM -0500, Bjorn Helgaas wrote:
> On Fri, Jun 21, 2024 at 06:29:48PM -0400, Frank Li wrote:
> > On Mon, Jun 17, 2024 at 10:26:36AM -0400, Frank Li wrote:
> > > On Thu, Jun 13, 2024 at 05:41:25PM -0500, Bjorn Helgaas wrote:
> > > > On Thu, Jun 06, 2024 at 04:24:17PM -0400, Frank Li wrote:
> > > > > On Mon, Jun 03, 2024 at 04:07:55PM -0400, Frank Li wrote:
> > > > > > On Mon, Jun 03, 2024 at 01:56:27PM -0500, Bjorn Helgaas wrote:
> > > > > > > On Mon, Jun 03, 2024 at 02:42:45PM -0400, Frank Li wrote:
> > > > > > > > On Mon, Jun 03, 2024 at 12:19:21PM -0500, Bjorn Helgaas wrote:
> > > > > > > > > On Fri, May 31, 2024 at 03:58:49PM +0100, Robin Murphy wrote:
> > > > > > > > > > On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
> > > > > > > > > > > [+cc IOMMU and pcie-apple.c folks for comment]
> > > > > > > > > > >
> > > > > > > > > > > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > > > > > > > > > > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > > > > > > > > > > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > > > > > > > > > > This involves examining the msi-map and smmu-map to ensure consistent
> > > > > > > > > > > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > > > > > > > > > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > > > > > > > > > > controller is utilized as a fallback.
> > > > > > > > > > > >
> > > > > > > > > > > > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > > > > > > > > > > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > > > > > > > > > > controller. This function configures the correct LUT based on Device Tree
> > > > > > > > > > > > Settings (DTS).
> > > > > > > > > > >
> > > > > > > > > > > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > > > > > > > > > > have to do this, I wish it were *more* similar, i.e., copy the
> > > > > > > > > > > function names, bitmap tracking, code structure, etc.
> > > > > > > > > > >
> > > > > > > > > > > I don't really know how stream IDs work, but I assume they are used on
> > > > > > > > > > > most or all arm64 platforms, so I'm a little surprised that of all the
> > > > > > > > > > > PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> > > > > > > > > > > this notifier.
> > > > > > > > > >
> > > > > > > > > > This is one of those things that's mostly at the mercy of the PCIe root
> > > > > > > > > > complex implementation. Typically the SMMU StreamID and/or GIC ITS DeviceID
> > > > > > > > > > is derived directly from the PCI RID, sometimes with additional high-order
> > > > > > > > > > bits hard-wired to disambiguate PCI segments. I believe this RID-translation
> > > > > > > > > > LUT is a particular feature of the the Synopsys IP - I know there's also one
> > > > > > > > > > on the NXP Layerscape platforms, but on those it's programmed by the
> > > > > > > > > > bootloader, which also generates the appropriate "msi-map" and "iommu-map"
> > > > > > > > > > properties to match. Ideally that's what i.MX should do as well, but hey.
> > > > > > > > >
> > > > > > > > > Maybe this RID-translation is a feature of i.MX, not of Synopsys?  I
> > > > > > > > > see that the LUT CSR accesses use IMX95_* definitions.
> > > > > > > >
> > > > > > > > Yes, it convert 16bit RID to 6bit stream id.
> > > > > > >
> > > > > > > IIUC, you're saying this is not a Synopsys feature, it's an i.MX
> > > > > > > feature.
> > > > > >
> > > > > > Yes, it is i.MX feature. But I think other vendor should have similar
> > > > > > situation if use old arm smmu.
> > > > > >
> > > > > > >
> > > > > > > > > > If it's really necessary to do this programming from Linux, then there's
> > > > > > > > > > still no point in it being dynamic - the mappings cannot ever change, since
> > > > > > > > > > the rest of the kernel believes that what the DT said at boot time was
> > > > > > > > > > already a property of the hardware. It would be a lot more logical, and
> > > > > > > > > > likely simpler, for the driver to just read the relevant map property and
> > > > > > > > > > program the entire LUT to match, all in one go at controller probe time.
> > > > > > > > > > Rather like what's already commonly done with the parsing of "dma-ranges" to
> > > > > > > > > > program address-translation LUTs for inbound windows.
> > > > > > > > > >
> > > > > > > > > > Plus that would also give a chance of safely dealing with bad DTs specifying
> > > > > > > > > > invalid ID mappings (by refusing to probe at all). As it is, returning an
> > > > > > > > > > error from a child's BUS_NOTIFY_ADD_DEVICE does nothing except prevent any
> > > > > > > > > > further notifiers from running at that point - the device will still be
> > > > > > > > > > added, allowed to bind a driver, and able to start sending DMA/MSI traffic
> > > > > > > > > > without the controller being correctly programmed, which at best won't work
> > > > > > > > > > and at worst may break the whole system.
> > > > > > > > >
> > > > > > > > > Frank, could the imx LUT be programmed once at boot-time instead of at
> > > > > > > > > device-add time?  I'm guessing maybe not because apparently there is a
> > > > > > > > > risk of running out of LUT entries?
> > > > > > > >
> > > > > > > > It is not good idea to depend on boot loader so much.
> > > > > > >
> > > > > > > I meant "could this be programmed once when the Linux imx host
> > > > > > > controller driver is probed?"  But from the below, it sounds like
> > > > > > > that's not possible in general because you don't have enough stream
> > > > > > > IDs to do that.
> > > > > >
> > > > > > Oh! sorry miss understand what your means. It is possible like what I did
> > > > > > at v3 version. But I think it is not good enough.
> > > > > >
> > > > > > >
> > > > > > > > Some hot plug devics
> > > > > > > > (SD7.0) may plug after system boot. Two PCIe instances shared one set
> > > > > > > > of 6bits stream id (total 64). Assume total 16 assign to two PCIe
> > > > > > > > controllers. each have 8 stream id. If use uboot assign it static, each
> > > > > > > > PCIe controller have below 8 devices.  It will be failrue one controller
> > > > > > > > connect 7, another connect 9. but if dynamtic alloc when devices add, both
> > > > > > > > controller can work.
> > > > > > > >
> > > > > > > > Although we have not so much devices now,  this way give us possility to
> > > > > > > > improve it in future.
> > > > > > > >
> > > > > > > > > It sounds like the consequences of running out of LUT entries are
> > > > > > > > > catastrophic, e.g., memory corruption from mis-directed DMA?  If
> > > > > > > > > that's possible, I think we need to figure out how to prevent the
> > > > > > > > > device from being used, not just dev_warn() about it.
> > > > > > > >
> > > > > > > > Yes, but so far, we have not met such problem now. We can improve it when
> > > > > > > > we really face such problem.
> > > > > > >
> > > > > > > If this controller can only support DMA from a limited number of
> > > > > > > endpoints below it, I think we should figure out how to enforce that
> > > > > > > directly.  Maybe we can prevent drivers from enabling bus mastering or
> > > > > > > something.  I'm not happy with the idea of waiting for and debugging a
> > > > > > > report of data corruption.
> > > > > >
> > > > > > It may add a pre-add hook function to pci bridge. let me do more research.
> > > > >
> > > > > Hi Bjorn:
> > > > >
> > > > > int pci_setup_device(struct pci_dev *dev)
> > > > > {
> > > > > 	dev->error_state = pci_channel_io_normal;
> > > > > 	...
> > > > > 	pci_fixup_device(pci_fixup_early, dev);
> > > > >
> > > > > 	^^^ I can add fixup hook for pci_fixup_early. If not resource,
> > > > > I can set dev->error_state to pci_channel_io_frozen or
> > > > > pci_channel_io_perm_failure
> > > > >
> > > > > 	And add below check here after call hook function.
> > > > >
> > > > > 	if (dev->error_state != pci_channel_io_normal)
> > > > > 		return -EIO;
> > > > >
> > > > > }
> > > > >
> > > > > How do you think this method? If you agree, I can continue search device
> > > > > remove hook up.
> > > >
> > > > I think this would mean the device would not appear to be enumerated
> > > > at all, right?  I.e., it wouldn't show up in lspci?  And we couldn't
> > > > use even a pure programmed IO driver with no DMA or MSI?
> > >
> > > Make sense. Let me do more research on this.
> > >
> > > Frank
> > > >
> > > > I wonder if we should have a function pointer in struct
> > > > pci_host_bridge, kind of like the existing ->map_irq(), where we could
> > > > do host bridge-specific setup when enumerating a PCI device.
> > 
> > Consider some device may no use MSI or DMA. It'd better set LUT when
> > allocate msi irq. I think insert a irq-domain in irq hierarchy.
> > 
> > static const struct irq_domain_ops lut_pcie_msi_domain_ops = {
> >         .alloc  = lut_pcie_irq_domain_alloc,
> >         .free   = lut_pcie_irq_domain_free,
> > };
> > 
> > int dw_pcie_allocate_domains(struct dw_pcie_rp *pp)
> > {
> >         struct fwnode_handle *fwnode = of_node_to_fwnode(pci->dev->of_node);
> > 
> >         pp->irq_domain = irq_domain_create_hierarchy(...)
> > 
> >         pp->msi_domain = pci_msi_create_irq_domain(...);
> > 
> >         return 0;
> > }
> > 
> > Manage lut stream id in lut_pcie_irq_domain_alloc() and
> > lut_pcie_irq_domain_free().
> > 
> > So failure happen only when driver use MSI and no-stream ID avaiable. It
> > should be better than failure when add devices. Some devices may not use
> > at all.
> 
> I'm not an IRQ expert, but it sounds plausible.  There might even be
> an opportunity to fall back to INTx if there's no stream ID available
> for MSI?

Sorry, I think this was a half-baked thought.  Exhaustion of stream
IDs should be an uncommon situation, and the important thing is to
prevent terrible things from happening.

I don't think it's worth bending over backwards to make everything
possible limp along.  If it's easy to just make the device
inaccessible, that's fine.  If there's a simple way to make it
available but keep from enabling bus mastering, we could do that too,
but only if it's really simple.
Frank Li June 24, 2024, 3 p.m. UTC | #25
On Sat, Jun 22, 2024 at 12:38:49PM -0500, Bjorn Helgaas wrote:
> On Fri, Jun 21, 2024 at 05:43:21PM -0500, Bjorn Helgaas wrote:
> > On Fri, Jun 21, 2024 at 06:29:48PM -0400, Frank Li wrote:
> > > On Mon, Jun 17, 2024 at 10:26:36AM -0400, Frank Li wrote:
> > > > On Thu, Jun 13, 2024 at 05:41:25PM -0500, Bjorn Helgaas wrote:
> > > > > On Thu, Jun 06, 2024 at 04:24:17PM -0400, Frank Li wrote:
> > > > > > On Mon, Jun 03, 2024 at 04:07:55PM -0400, Frank Li wrote:
> > > > > > > On Mon, Jun 03, 2024 at 01:56:27PM -0500, Bjorn Helgaas wrote:
> > > > > > > > On Mon, Jun 03, 2024 at 02:42:45PM -0400, Frank Li wrote:
> > > > > > > > > On Mon, Jun 03, 2024 at 12:19:21PM -0500, Bjorn Helgaas wrote:
> > > > > > > > > > On Fri, May 31, 2024 at 03:58:49PM +0100, Robin Murphy wrote:
> > > > > > > > > > > On 2024-05-31 12:08 am, Bjorn Helgaas wrote:
> > > > > > > > > > > > [+cc IOMMU and pcie-apple.c folks for comment]
> > > > > > > > > > > >
> > > > > > > > > > > > On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > > > > > > > > > > > > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > > > > > > > > > > > > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > > > > > > > > > > > > This involves examining the msi-map and smmu-map to ensure consistent
> > > > > > > > > > > > > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > > > > > > > > > > > > registers are configured. In the absence of an msi-map, the built-in MSI
> > > > > > > > > > > > > controller is utilized as a fallback.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > > > > > > > > > > > > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > > > > > > > > > > > > controller. This function configures the correct LUT based on Device Tree
> > > > > > > > > > > > > Settings (DTS).
> > > > > > > > > > > >
> > > > > > > > > > > > This scheme is pretty similar to apple_pcie_bus_notifier().  If we
> > > > > > > > > > > > have to do this, I wish it were *more* similar, i.e., copy the
> > > > > > > > > > > > function names, bitmap tracking, code structure, etc.
> > > > > > > > > > > >
> > > > > > > > > > > > I don't really know how stream IDs work, but I assume they are used on
> > > > > > > > > > > > most or all arm64 platforms, so I'm a little surprised that of all the
> > > > > > > > > > > > PCI host drivers used on arm64, only pcie-apple.c and pci-imx6.c need
> > > > > > > > > > > > this notifier.
> > > > > > > > > > >
> > > > > > > > > > > This is one of those things that's mostly at the mercy of the PCIe root
> > > > > > > > > > > complex implementation. Typically the SMMU StreamID and/or GIC ITS DeviceID
> > > > > > > > > > > is derived directly from the PCI RID, sometimes with additional high-order
> > > > > > > > > > > bits hard-wired to disambiguate PCI segments. I believe this RID-translation
> > > > > > > > > > > LUT is a particular feature of the the Synopsys IP - I know there's also one
> > > > > > > > > > > on the NXP Layerscape platforms, but on those it's programmed by the
> > > > > > > > > > > bootloader, which also generates the appropriate "msi-map" and "iommu-map"
> > > > > > > > > > > properties to match. Ideally that's what i.MX should do as well, but hey.
> > > > > > > > > >
> > > > > > > > > > Maybe this RID-translation is a feature of i.MX, not of Synopsys?  I
> > > > > > > > > > see that the LUT CSR accesses use IMX95_* definitions.
> > > > > > > > >
> > > > > > > > > Yes, it convert 16bit RID to 6bit stream id.
> > > > > > > >
> > > > > > > > IIUC, you're saying this is not a Synopsys feature, it's an i.MX
> > > > > > > > feature.
> > > > > > >
> > > > > > > Yes, it is i.MX feature. But I think other vendor should have similar
> > > > > > > situation if use old arm smmu.
> > > > > > >
> > > > > > > >
> > > > > > > > > > > If it's really necessary to do this programming from Linux, then there's
> > > > > > > > > > > still no point in it being dynamic - the mappings cannot ever change, since
> > > > > > > > > > > the rest of the kernel believes that what the DT said at boot time was
> > > > > > > > > > > already a property of the hardware. It would be a lot more logical, and
> > > > > > > > > > > likely simpler, for the driver to just read the relevant map property and
> > > > > > > > > > > program the entire LUT to match, all in one go at controller probe time.
> > > > > > > > > > > Rather like what's already commonly done with the parsing of "dma-ranges" to
> > > > > > > > > > > program address-translation LUTs for inbound windows.
> > > > > > > > > > >
> > > > > > > > > > > Plus that would also give a chance of safely dealing with bad DTs specifying
> > > > > > > > > > > invalid ID mappings (by refusing to probe at all). As it is, returning an
> > > > > > > > > > > error from a child's BUS_NOTIFY_ADD_DEVICE does nothing except prevent any
> > > > > > > > > > > further notifiers from running at that point - the device will still be
> > > > > > > > > > > added, allowed to bind a driver, and able to start sending DMA/MSI traffic
> > > > > > > > > > > without the controller being correctly programmed, which at best won't work
> > > > > > > > > > > and at worst may break the whole system.
> > > > > > > > > >
> > > > > > > > > > Frank, could the imx LUT be programmed once at boot-time instead of at
> > > > > > > > > > device-add time?  I'm guessing maybe not because apparently there is a
> > > > > > > > > > risk of running out of LUT entries?
> > > > > > > > >
> > > > > > > > > It is not good idea to depend on boot loader so much.
> > > > > > > >
> > > > > > > > I meant "could this be programmed once when the Linux imx host
> > > > > > > > controller driver is probed?"  But from the below, it sounds like
> > > > > > > > that's not possible in general because you don't have enough stream
> > > > > > > > IDs to do that.
> > > > > > >
> > > > > > > Oh! sorry miss understand what your means. It is possible like what I did
> > > > > > > at v3 version. But I think it is not good enough.
> > > > > > >
> > > > > > > >
> > > > > > > > > Some hot plug devics
> > > > > > > > > (SD7.0) may plug after system boot. Two PCIe instances shared one set
> > > > > > > > > of 6bits stream id (total 64). Assume total 16 assign to two PCIe
> > > > > > > > > controllers. each have 8 stream id. If use uboot assign it static, each
> > > > > > > > > PCIe controller have below 8 devices.  It will be failrue one controller
> > > > > > > > > connect 7, another connect 9. but if dynamtic alloc when devices add, both
> > > > > > > > > controller can work.
> > > > > > > > >
> > > > > > > > > Although we have not so much devices now,  this way give us possility to
> > > > > > > > > improve it in future.
> > > > > > > > >
> > > > > > > > > > It sounds like the consequences of running out of LUT entries are
> > > > > > > > > > catastrophic, e.g., memory corruption from mis-directed DMA?  If
> > > > > > > > > > that's possible, I think we need to figure out how to prevent the
> > > > > > > > > > device from being used, not just dev_warn() about it.
> > > > > > > > >
> > > > > > > > > Yes, but so far, we have not met such problem now. We can improve it when
> > > > > > > > > we really face such problem.
> > > > > > > >
> > > > > > > > If this controller can only support DMA from a limited number of
> > > > > > > > endpoints below it, I think we should figure out how to enforce that
> > > > > > > > directly.  Maybe we can prevent drivers from enabling bus mastering or
> > > > > > > > something.  I'm not happy with the idea of waiting for and debugging a
> > > > > > > > report of data corruption.
> > > > > > >
> > > > > > > It may add a pre-add hook function to pci bridge. let me do more research.
> > > > > >
> > > > > > Hi Bjorn:
> > > > > >
> > > > > > int pci_setup_device(struct pci_dev *dev)
> > > > > > {
> > > > > > 	dev->error_state = pci_channel_io_normal;
> > > > > > 	...
> > > > > > 	pci_fixup_device(pci_fixup_early, dev);
> > > > > >
> > > > > > 	^^^ I can add fixup hook for pci_fixup_early. If not resource,
> > > > > > I can set dev->error_state to pci_channel_io_frozen or
> > > > > > pci_channel_io_perm_failure
> > > > > >
> > > > > > 	And add below check here after call hook function.
> > > > > >
> > > > > > 	if (dev->error_state != pci_channel_io_normal)
> > > > > > 		return -EIO;
> > > > > >
> > > > > > }
> > > > > >
> > > > > > How do you think this method? If you agree, I can continue search device
> > > > > > remove hook up.
> > > > >
> > > > > I think this would mean the device would not appear to be enumerated
> > > > > at all, right?  I.e., it wouldn't show up in lspci?  And we couldn't
> > > > > use even a pure programmed IO driver with no DMA or MSI?
> > > >
> > > > Make sense. Let me do more research on this.
> > > >
> > > > Frank
> > > > >
> > > > > I wonder if we should have a function pointer in struct
> > > > > pci_host_bridge, kind of like the existing ->map_irq(), where we could
> > > > > do host bridge-specific setup when enumerating a PCI device.
> > > 
> > > Consider some device may no use MSI or DMA. It'd better set LUT when
> > > allocate msi irq. I think insert a irq-domain in irq hierarchy.
> > > 
> > > static const struct irq_domain_ops lut_pcie_msi_domain_ops = {
> > >         .alloc  = lut_pcie_irq_domain_alloc,
> > >         .free   = lut_pcie_irq_domain_free,
> > > };
> > > 
> > > int dw_pcie_allocate_domains(struct dw_pcie_rp *pp)
> > > {
> > >         struct fwnode_handle *fwnode = of_node_to_fwnode(pci->dev->of_node);
> > > 
> > >         pp->irq_domain = irq_domain_create_hierarchy(...)
> > > 
> > >         pp->msi_domain = pci_msi_create_irq_domain(...);
> > > 
> > >         return 0;
> > > }
> > > 
> > > Manage lut stream id in lut_pcie_irq_domain_alloc() and
> > > lut_pcie_irq_domain_free().
> > > 
> > > So failure happen only when driver use MSI and no-stream ID avaiable. It
> > > should be better than failure when add devices. Some devices may not use
> > > at all.
> > 
> > I'm not an IRQ expert, but it sounds plausible.  There might even be
> > an opportunity to fall back to INTx if there's no stream ID available
> > for MSI?
> 
> Sorry, I think this was a half-baked thought.  Exhaustion of stream
> IDs should be an uncommon situation, and the important thing is to
> prevent terrible things from happening.
> 
> I don't think it's worth bending over backwards to make everything
> possible limp along.  If it's easy to just make the device
> inaccessible, that's fine.  If there's a simple way to make it
> available but keep from enabling bus mastering, we could do that too,
> but only if it's really simple.

Mani mentions qcom use simple method to config all lut when probe at
qcom_pcie_config_sid_1_9_0(). It is similar with my v3 version.

https://lore.kernel.org/imx/20240402-pci2_upstream-v3-8-803414bdb430@nxp.com/

Of course both have not resolved run-out sid problems. But genenerally,
one PCIE slot only connect one devices. static alloc 8/16 sid are enough
for 99.9% user case.

best regards
Frank Li
Frank Li June 24, 2024, 3:06 p.m. UTC | #26
On Sat, Jun 22, 2024 at 09:41:15AM +0530, Manivannan Sadhasivam wrote:
> On Tue, May 28, 2024 at 03:39:21PM -0400, Frank Li wrote:
> > For the i.MX95, configuration of a LUT is necessary to convert Bus Device
> > Function (BDF) to stream IDs, which are utilized by both IOMMU and ITS.
> > This involves examining the msi-map and smmu-map to ensure consistent
> > mapping of PCI BDF to the same stream IDs. Subsequently, LUT-related
> > registers are configured. In the absence of an msi-map, the built-in MSI
> > controller is utilized as a fallback.
> > 
> > Additionally, register a PCI bus notifier to trigger imx_pcie_add_device()
> > upon the appearance of a new PCI device and when the bus is an iMX6 PCI
> > controller. This function configures the correct LUT based on Device Tree
> > Settings (DTS).
> > 
> 
> Sorry for jumping the ship very late... But why can't you configure the LUT
> during probe() itself? Anyway, you are going to use the 'iommu-map' and
> 'msi-map' which are static info provided in DT. I don't see a necessity to do it
> during device addition time.
> 
> Qcom RC driver also uses a similar configuration in
> qcom_pcie_config_sid_1_9_0().

It is similar with my v3 version.
https://lore.kernel.org/imx/20240402-pci2_upstream-v3-8-803414bdb430@nxp.com/

Rob don't like parse these property by individial driver.
https://lore.kernel.org/imx/20240429150842.GC1709920-robh@kernel.org/#t

But if use standarnd of_map_xxx() API to get sid, it needs RID information,
which only get when new PCI device added.

SID problem may take more long time to discuss. Can you help review v6
version:

https://lore.kernel.org/imx/20240617-pci2_upstream-v6-0-e0821238f997@nxp.com/T/#t

I want make bug fixes and 8qxp support to be merged firstly.

Frank

> 
> - Mani
> 
> > Signed-off-by: Frank Li <Frank.Li@nxp.com>
> > ---
> >  drivers/pci/controller/dwc/pci-imx6.c | 175 +++++++++++++++++++++++++++++++++-
> >  1 file changed, 174 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
> > index 29309ad0e352b..8ecc00049e20b 100644
> > --- a/drivers/pci/controller/dwc/pci-imx6.c
> > +++ b/drivers/pci/controller/dwc/pci-imx6.c
> > @@ -54,6 +54,22 @@
> >  #define IMX95_PE0_GEN_CTRL_3			0x1058
> >  #define IMX95_PCIE_LTSSM_EN			BIT(0)
> >  
> > +#define IMX95_PE0_LUT_ACSCTRL			0x1008
> > +#define IMX95_PEO_LUT_RWA			BIT(16)
> > +#define IMX95_PE0_LUT_ENLOC			GENMASK(4, 0)
> > +
> > +#define IMX95_PE0_LUT_DATA1			0x100c
> > +#define IMX95_PE0_LUT_VLD			BIT(31)
> > +#define IMX95_PE0_LUT_DAC_ID			GENMASK(10, 8)
> > +#define IMX95_PE0_LUT_STREAM_ID			GENMASK(5, 0)
> > +
> > +#define IMX95_PE0_LUT_DATA2			0x1010
> > +#define IMX95_PE0_LUT_REQID			GENMASK(31, 16)
> > +#define IMX95_PE0_LUT_MASK			GENMASK(15, 0)
> > +
> > +#define IMX95_SID_MASK				GENMASK(5, 0)
> > +#define IMX95_MAX_LUT				32
> > +
> >  #define to_imx_pcie(x)	dev_get_drvdata((x)->dev)
> >  
> >  enum imx_pcie_variants {
> > @@ -79,6 +95,7 @@ enum imx_pcie_variants {
> >  #define IMX_PCIE_FLAG_HAS_PHY_RESET		BIT(5)
> >  #define IMX_PCIE_FLAG_HAS_SERDES		BIT(6)
> >  #define IMX_PCIE_FLAG_SUPPORT_64BIT		BIT(7)
> > +#define IMX_PCIE_FLAG_MONITOR_DEV		BIT(8)
> >  
> >  #define imx_check_flag(pci, val)     (pci->drvdata->flags & val)
> >  
> > @@ -132,6 +149,8 @@ struct imx_pcie {
> >  	struct device		*pd_pcie_phy;
> >  	struct phy		*phy;
> >  	const struct imx_pcie_drvdata *drvdata;
> > +
> > +	struct mutex		lock;
> >  };
> >  
> >  /* Parameters for the waiting for PCIe PHY PLL to lock on i.MX7 */
> > @@ -215,6 +234,66 @@ static int imx95_pcie_init_phy(struct imx_pcie *imx_pcie)
> >  	return 0;
> >  }
> >  
> > +static int imx_pcie_config_lut(struct imx_pcie *imx_pcie, u16 reqid, u8 sid)
> > +{
> > +	struct dw_pcie *pci = imx_pcie->pci;
> > +	struct device *dev = pci->dev;
> > +	u32 data1, data2;
> > +	int i;
> > +
> > +	if (sid >= 64) {
> > +		dev_err(dev, "Invalid SID for index %d\n", sid);
> > +		return -EINVAL;
> > +	}
> > +
> > +	guard(mutex)(&imx_pcie->lock);
> > +
> > +	for (i = 0; i < IMX95_MAX_LUT; i++) {
> > +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, IMX95_PEO_LUT_RWA | i);
> > +
> > +		regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, &data1);
> > +		if (data1 & IMX95_PE0_LUT_VLD)
> > +			continue;
> > +
> > +		data1 = FIELD_PREP(IMX95_PE0_LUT_DAC_ID, 0);
> > +		data1 |= FIELD_PREP(IMX95_PE0_LUT_STREAM_ID, sid);
> > +		data1 |= IMX95_PE0_LUT_VLD;
> > +
> > +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, data1);
> > +
> > +		data2 = 0xffff;
> > +		data2 |= FIELD_PREP(IMX95_PE0_LUT_REQID, reqid);
> > +
> > +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, data2);
> > +
> > +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, i);
> > +
> > +		return 0;
> > +	}
> > +
> > +	dev_err(dev, "All lut already used\n");
> > +	return -EINVAL;
> > +}
> > +
> > +static void imx_pcie_remove_lut(struct imx_pcie *imx_pcie, u16 reqid)
> > +{
> > +	u32 data2 = 0;
> > +	int i;
> > +
> > +	guard(mutex)(&imx_pcie->lock);
> > +
> > +	for (i = 0; i < IMX95_MAX_LUT; i++) {
> > +		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, IMX95_PEO_LUT_RWA | i);
> > +
> > +		regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, &data2);
> > +		if (FIELD_GET(IMX95_PE0_LUT_REQID, data2) == reqid) {
> > +			regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, 0);
> > +			regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, 0);
> > +			regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, i);
> > +		}
> > +	}
> > +}
> > +
> >  static void imx_pcie_configure_type(struct imx_pcie *imx_pcie)
> >  {
> >  	const struct imx_pcie_drvdata *drvdata = imx_pcie->drvdata;
> > @@ -1232,6 +1311,85 @@ static int imx_pcie_resume_noirq(struct device *dev)
> >  	return 0;
> >  }
> >  
> > +static bool imx_pcie_match_device(struct pci_bus *bus);
> > +
> > +static int imx_pcie_add_device(struct imx_pcie *imx_pcie, struct pci_dev *pdev)
> > +{
> > +	u32 sid_i = 0, sid_m = 0, rid = pci_dev_id(pdev);
> > +	struct device *dev = imx_pcie->pci->dev;
> > +	int err;
> > +
> > +	err = of_map_id(dev->of_node, rid, "iommu-map", "iommu-map-mask", NULL, &sid_i);
> > +	if (err)
> > +		return err;
> > +
> > +	err = of_map_id(dev->of_node, rid, "msi-map", "msi-map-mask", NULL, &sid_m);
> > +	if (err)
> > +		return err;
> > +
> > +	if (sid_i != rid && sid_m != rid)
> > +		if ((sid_i & IMX95_SID_MASK) != (sid_m & IMX95_SID_MASK)) {
> > +			dev_err(dev, "its and iommu stream id miss match, please check dts file\n");
> > +			return -EINVAL;
> > +		}
> > +
> > +	/* if iommu-map is not existed then use msi-map's stream id*/
> > +	if (sid_i == rid)
> > +		sid_i = sid_m;
> > +
> > +	sid_i &= IMX95_SID_MASK;
> > +
> > +	if (sid_i != rid)
> > +		return imx_pcie_config_lut(imx_pcie, rid, sid_i);
> > +
> > +	/* Use dwc built-in MSI controller */
> > +	return 0;
> > +}
> > +
> > +static void imx_pcie_del_device(struct imx_pcie *imx_pcie, struct pci_dev *pdev)
> > +{
> > +	imx_pcie_remove_lut(imx_pcie, pci_dev_id(pdev));
> > +}
> > +
> > +
> > +static int imx_pcie_bus_notifier(struct notifier_block *nb, unsigned long action, void *data)
> > +{
> > +	struct pci_host_bridge *host;
> > +	struct imx_pcie *imx_pcie;
> > +	struct pci_dev *pdev;
> > +	int err;
> > +
> > +	pdev = to_pci_dev(data);
> > +	host = pci_find_host_bridge(pdev->bus);
> > +
> > +	if (!imx_pcie_match_device(host->bus))
> > +		return NOTIFY_OK;
> > +
> > +	imx_pcie = to_imx_pcie(to_dw_pcie_from_pp(host->sysdata));
> > +
> > +	if (!imx_check_flag(imx_pcie, IMX_PCIE_FLAG_MONITOR_DEV))
> > +		return NOTIFY_OK;
> > +
> > +	switch (action) {
> > +	case BUS_NOTIFY_ADD_DEVICE:
> > +		err = imx_pcie_add_device(imx_pcie, pdev);
> > +		if (err)
> > +			return notifier_from_errno(err);
> > +		break;
> > +	case BUS_NOTIFY_DEL_DEVICE:
> > +		imx_pcie_del_device(imx_pcie, pdev);
> > +		break;
> > +	default:
> > +		return NOTIFY_DONE;
> > +	}
> > +
> > +	return NOTIFY_OK;
> > +}
> > +
> > +static struct notifier_block imx_pcie_nb = {
> > +	.notifier_call = imx_pcie_bus_notifier,
> > +};
> > +
> >  static const struct dev_pm_ops imx_pcie_pm_ops = {
> >  	NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_pcie_suspend_noirq,
> >  				  imx_pcie_resume_noirq)
> > @@ -1264,6 +1422,8 @@ static int imx_pcie_probe(struct platform_device *pdev)
> >  	imx_pcie->pci = pci;
> >  	imx_pcie->drvdata = of_device_get_match_data(dev);
> >  
> > +	mutex_init(&imx_pcie->lock);
> > +
> >  	/* Find the PHY if one is defined, only imx7d uses it */
> >  	np = of_parse_phandle(node, "fsl,imx7d-pcie-phy", 0);
> >  	if (np) {
> > @@ -1551,7 +1711,8 @@ static const struct imx_pcie_drvdata drvdata[] = {
> >  	},
> >  	[IMX95] = {
> >  		.variant = IMX95,
> > -		.flags = IMX_PCIE_FLAG_HAS_SERDES,
> > +		.flags = IMX_PCIE_FLAG_HAS_SERDES |
> > +			 IMX_PCIE_FLAG_MONITOR_DEV,
> >  		.clk_names = imx8mq_clks,
> >  		.clks_cnt = ARRAY_SIZE(imx8mq_clks),
> >  		.ltssm_off = IMX95_PE0_GEN_CTRL_3,
> > @@ -1687,6 +1848,8 @@ DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_SYNOPSYS, 0xabcd,
> >  
> >  static int __init imx_pcie_init(void)
> >  {
> > +	int ret;
> > +
> >  #ifdef CONFIG_ARM
> >  	struct device_node *np;
> >  
> > @@ -1705,7 +1868,17 @@ static int __init imx_pcie_init(void)
> >  	hook_fault_code(8, imx6q_pcie_abort_handler, SIGBUS, 0,
> >  			"external abort on non-linefetch");
> >  #endif
> > +	ret = bus_register_notifier(&pci_bus_type, &imx_pcie_nb);
> > +	if (ret)
> > +		return ret;
> >  
> >  	return platform_driver_register(&imx_pcie_driver);
> >  }
> > +
> > +static void __exit imx_pcie_exit(void)
> > +{
> > +	bus_unregister_notifier(&pci_bus_type, &imx_pcie_nb);
> > +}
> > +
> >  device_initcall(imx_pcie_init);
> > +__exitcall(imx_pcie_exit);
> > 
> > -- 
> > 2.34.1
> > 
> 
> -- 
> மணிவண்ணன் சதாசிவம்
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
index 29309ad0e352b..8ecc00049e20b 100644
--- a/drivers/pci/controller/dwc/pci-imx6.c
+++ b/drivers/pci/controller/dwc/pci-imx6.c
@@ -54,6 +54,22 @@ 
 #define IMX95_PE0_GEN_CTRL_3			0x1058
 #define IMX95_PCIE_LTSSM_EN			BIT(0)
 
+#define IMX95_PE0_LUT_ACSCTRL			0x1008
+#define IMX95_PEO_LUT_RWA			BIT(16)
+#define IMX95_PE0_LUT_ENLOC			GENMASK(4, 0)
+
+#define IMX95_PE0_LUT_DATA1			0x100c
+#define IMX95_PE0_LUT_VLD			BIT(31)
+#define IMX95_PE0_LUT_DAC_ID			GENMASK(10, 8)
+#define IMX95_PE0_LUT_STREAM_ID			GENMASK(5, 0)
+
+#define IMX95_PE0_LUT_DATA2			0x1010
+#define IMX95_PE0_LUT_REQID			GENMASK(31, 16)
+#define IMX95_PE0_LUT_MASK			GENMASK(15, 0)
+
+#define IMX95_SID_MASK				GENMASK(5, 0)
+#define IMX95_MAX_LUT				32
+
 #define to_imx_pcie(x)	dev_get_drvdata((x)->dev)
 
 enum imx_pcie_variants {
@@ -79,6 +95,7 @@  enum imx_pcie_variants {
 #define IMX_PCIE_FLAG_HAS_PHY_RESET		BIT(5)
 #define IMX_PCIE_FLAG_HAS_SERDES		BIT(6)
 #define IMX_PCIE_FLAG_SUPPORT_64BIT		BIT(7)
+#define IMX_PCIE_FLAG_MONITOR_DEV		BIT(8)
 
 #define imx_check_flag(pci, val)     (pci->drvdata->flags & val)
 
@@ -132,6 +149,8 @@  struct imx_pcie {
 	struct device		*pd_pcie_phy;
 	struct phy		*phy;
 	const struct imx_pcie_drvdata *drvdata;
+
+	struct mutex		lock;
 };
 
 /* Parameters for the waiting for PCIe PHY PLL to lock on i.MX7 */
@@ -215,6 +234,66 @@  static int imx95_pcie_init_phy(struct imx_pcie *imx_pcie)
 	return 0;
 }
 
+static int imx_pcie_config_lut(struct imx_pcie *imx_pcie, u16 reqid, u8 sid)
+{
+	struct dw_pcie *pci = imx_pcie->pci;
+	struct device *dev = pci->dev;
+	u32 data1, data2;
+	int i;
+
+	if (sid >= 64) {
+		dev_err(dev, "Invalid SID for index %d\n", sid);
+		return -EINVAL;
+	}
+
+	guard(mutex)(&imx_pcie->lock);
+
+	for (i = 0; i < IMX95_MAX_LUT; i++) {
+		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, IMX95_PEO_LUT_RWA | i);
+
+		regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, &data1);
+		if (data1 & IMX95_PE0_LUT_VLD)
+			continue;
+
+		data1 = FIELD_PREP(IMX95_PE0_LUT_DAC_ID, 0);
+		data1 |= FIELD_PREP(IMX95_PE0_LUT_STREAM_ID, sid);
+		data1 |= IMX95_PE0_LUT_VLD;
+
+		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, data1);
+
+		data2 = 0xffff;
+		data2 |= FIELD_PREP(IMX95_PE0_LUT_REQID, reqid);
+
+		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, data2);
+
+		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, i);
+
+		return 0;
+	}
+
+	dev_err(dev, "All lut already used\n");
+	return -EINVAL;
+}
+
+static void imx_pcie_remove_lut(struct imx_pcie *imx_pcie, u16 reqid)
+{
+	u32 data2 = 0;
+	int i;
+
+	guard(mutex)(&imx_pcie->lock);
+
+	for (i = 0; i < IMX95_MAX_LUT; i++) {
+		regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, IMX95_PEO_LUT_RWA | i);
+
+		regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, &data2);
+		if (FIELD_GET(IMX95_PE0_LUT_REQID, data2) == reqid) {
+			regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, 0);
+			regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, 0);
+			regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, i);
+		}
+	}
+}
+
 static void imx_pcie_configure_type(struct imx_pcie *imx_pcie)
 {
 	const struct imx_pcie_drvdata *drvdata = imx_pcie->drvdata;
@@ -1232,6 +1311,85 @@  static int imx_pcie_resume_noirq(struct device *dev)
 	return 0;
 }
 
+static bool imx_pcie_match_device(struct pci_bus *bus);
+
+static int imx_pcie_add_device(struct imx_pcie *imx_pcie, struct pci_dev *pdev)
+{
+	u32 sid_i = 0, sid_m = 0, rid = pci_dev_id(pdev);
+	struct device *dev = imx_pcie->pci->dev;
+	int err;
+
+	err = of_map_id(dev->of_node, rid, "iommu-map", "iommu-map-mask", NULL, &sid_i);
+	if (err)
+		return err;
+
+	err = of_map_id(dev->of_node, rid, "msi-map", "msi-map-mask", NULL, &sid_m);
+	if (err)
+		return err;
+
+	if (sid_i != rid && sid_m != rid)
+		if ((sid_i & IMX95_SID_MASK) != (sid_m & IMX95_SID_MASK)) {
+			dev_err(dev, "its and iommu stream id miss match, please check dts file\n");
+			return -EINVAL;
+		}
+
+	/* if iommu-map is not existed then use msi-map's stream id*/
+	if (sid_i == rid)
+		sid_i = sid_m;
+
+	sid_i &= IMX95_SID_MASK;
+
+	if (sid_i != rid)
+		return imx_pcie_config_lut(imx_pcie, rid, sid_i);
+
+	/* Use dwc built-in MSI controller */
+	return 0;
+}
+
+static void imx_pcie_del_device(struct imx_pcie *imx_pcie, struct pci_dev *pdev)
+{
+	imx_pcie_remove_lut(imx_pcie, pci_dev_id(pdev));
+}
+
+
+static int imx_pcie_bus_notifier(struct notifier_block *nb, unsigned long action, void *data)
+{
+	struct pci_host_bridge *host;
+	struct imx_pcie *imx_pcie;
+	struct pci_dev *pdev;
+	int err;
+
+	pdev = to_pci_dev(data);
+	host = pci_find_host_bridge(pdev->bus);
+
+	if (!imx_pcie_match_device(host->bus))
+		return NOTIFY_OK;
+
+	imx_pcie = to_imx_pcie(to_dw_pcie_from_pp(host->sysdata));
+
+	if (!imx_check_flag(imx_pcie, IMX_PCIE_FLAG_MONITOR_DEV))
+		return NOTIFY_OK;
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		err = imx_pcie_add_device(imx_pcie, pdev);
+		if (err)
+			return notifier_from_errno(err);
+		break;
+	case BUS_NOTIFY_DEL_DEVICE:
+		imx_pcie_del_device(imx_pcie, pdev);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block imx_pcie_nb = {
+	.notifier_call = imx_pcie_bus_notifier,
+};
+
 static const struct dev_pm_ops imx_pcie_pm_ops = {
 	NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_pcie_suspend_noirq,
 				  imx_pcie_resume_noirq)
@@ -1264,6 +1422,8 @@  static int imx_pcie_probe(struct platform_device *pdev)
 	imx_pcie->pci = pci;
 	imx_pcie->drvdata = of_device_get_match_data(dev);
 
+	mutex_init(&imx_pcie->lock);
+
 	/* Find the PHY if one is defined, only imx7d uses it */
 	np = of_parse_phandle(node, "fsl,imx7d-pcie-phy", 0);
 	if (np) {
@@ -1551,7 +1711,8 @@  static const struct imx_pcie_drvdata drvdata[] = {
 	},
 	[IMX95] = {
 		.variant = IMX95,
-		.flags = IMX_PCIE_FLAG_HAS_SERDES,
+		.flags = IMX_PCIE_FLAG_HAS_SERDES |
+			 IMX_PCIE_FLAG_MONITOR_DEV,
 		.clk_names = imx8mq_clks,
 		.clks_cnt = ARRAY_SIZE(imx8mq_clks),
 		.ltssm_off = IMX95_PE0_GEN_CTRL_3,
@@ -1687,6 +1848,8 @@  DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_SYNOPSYS, 0xabcd,
 
 static int __init imx_pcie_init(void)
 {
+	int ret;
+
 #ifdef CONFIG_ARM
 	struct device_node *np;
 
@@ -1705,7 +1868,17 @@  static int __init imx_pcie_init(void)
 	hook_fault_code(8, imx6q_pcie_abort_handler, SIGBUS, 0,
 			"external abort on non-linefetch");
 #endif
+	ret = bus_register_notifier(&pci_bus_type, &imx_pcie_nb);
+	if (ret)
+		return ret;
 
 	return platform_driver_register(&imx_pcie_driver);
 }
+
+static void __exit imx_pcie_exit(void)
+{
+	bus_unregister_notifier(&pci_bus_type, &imx_pcie_nb);
+}
+
 device_initcall(imx_pcie_init);
+__exitcall(imx_pcie_exit);