diff mbox series

[V3,XRT,Alveo,07/18] fpga: xrt: management physical function driver (root)

Message ID 20210218064019.29189-8-lizhih@xilinx.com (mailing list archive)
State Superseded, archived
Headers show
Series XRT Alveo driver overview | expand

Commit Message

Lizhi Hou Feb. 18, 2021, 6:40 a.m. UTC
The PCIE device driver which attaches to management function on Alveo
devices. It instantiates one or more partition drivers which in turn
instantiate platform drivers. The instantiation of partition and platform
drivers is completely data driven.

Signed-off-by: Sonal Santan <sonal.santan@xilinx.com>
Signed-off-by: Max Zhen <max.zhen@xilinx.com>
Signed-off-by: Lizhi Hou <lizhih@xilinx.com>
---
 drivers/fpga/xrt/include/xroot.h | 114 +++++++++++
 drivers/fpga/xrt/mgmt/root.c     | 342 +++++++++++++++++++++++++++++++
 2 files changed, 456 insertions(+)
 create mode 100644 drivers/fpga/xrt/include/xroot.h
 create mode 100644 drivers/fpga/xrt/mgmt/root.c

Comments

Tom Rix Feb. 26, 2021, 3:01 p.m. UTC | #1
A question i do not know the answer to.

Seems like 'golden' is linked to a manufacturing (diagnostics?) image.

If the public will never see it, should handling it here be done ?

Moritz, do you know ?


On 2/17/21 10:40 PM, Lizhi Hou wrote:
> The PCIE device driver which attaches to management function on Alveo
to the management
> devices. It instantiates one or more partition drivers which in turn
more fpga partition / group ?
> instantiate platform drivers. The instantiation of partition and platform
> drivers is completely data driven.
data driven ? everything is data driven.  do you mean dtb driven ?
>
> Signed-off-by: Sonal Santan <sonal.santan@xilinx.com>
> Signed-off-by: Max Zhen <max.zhen@xilinx.com>
> Signed-off-by: Lizhi Hou <lizhih@xilinx.com>
> ---
>  drivers/fpga/xrt/include/xroot.h | 114 +++++++++++
>  drivers/fpga/xrt/mgmt/root.c     | 342 +++++++++++++++++++++++++++++++
>  2 files changed, 456 insertions(+)
>  create mode 100644 drivers/fpga/xrt/include/xroot.h
>  create mode 100644 drivers/fpga/xrt/mgmt/root.c
>
> diff --git a/drivers/fpga/xrt/include/xroot.h b/drivers/fpga/xrt/include/xroot.h
> new file mode 100644
> index 000000000000..752e10daa85e
> --- /dev/null
> +++ b/drivers/fpga/xrt/include/xroot.h
> @@ -0,0 +1,114 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Header file for Xilinx Runtime (XRT) driver
> + *
> + * Copyright (C) 2020-2021 Xilinx, Inc.
> + *
> + * Authors:
> + *	Cheng Zhen <maxz@xilinx.com>
> + */
> +
> +#ifndef _XRT_ROOT_H_
> +#define _XRT_ROOT_H_
> +
> +#include <linux/pci.h>
> +#include "subdev_id.h"
> +#include "events.h"
> +
> +typedef bool (*xrt_subdev_match_t)(enum xrt_subdev_id,
> +	struct platform_device *, void *);
> +#define XRT_SUBDEV_MATCH_PREV	((xrt_subdev_match_t)-1)
> +#define XRT_SUBDEV_MATCH_NEXT	((xrt_subdev_match_t)-2)
> +
> +/*
> + * Root IOCTL calls.
> + */
> +enum xrt_root_ioctl_cmd {
> +	/* Leaf actions. */
> +	XRT_ROOT_GET_LEAF = 0,
> +	XRT_ROOT_PUT_LEAF,
> +	XRT_ROOT_GET_LEAF_HOLDERS,
> +
> +	/* Group actions. */
> +	XRT_ROOT_CREATE_GROUP,
> +	XRT_ROOT_REMOVE_GROUP,
> +	XRT_ROOT_LOOKUP_GROUP,
> +	XRT_ROOT_WAIT_GROUP_BRINGUP,
> +
> +	/* Event actions. */
> +	XRT_ROOT_EVENT,
should this be XRT_ROOT_EVENT_SYNC ?
> +	XRT_ROOT_EVENT_ASYNC,
> +
> +	/* Device info. */
> +	XRT_ROOT_GET_RESOURCE,
> +	XRT_ROOT_GET_ID,
> +
> +	/* Misc. */
> +	XRT_ROOT_HOT_RESET,
> +	XRT_ROOT_HWMON,
> +};
> +
> +struct xrt_root_ioctl_get_leaf {
> +	struct platform_device *xpigl_pdev; /* caller's pdev */
xpigl_ ? unneeded suffix in element names
> +	xrt_subdev_match_t xpigl_match_cb;
> +	void *xpigl_match_arg;
> +	struct platform_device *xpigl_leaf; /* target leaf pdev */
> +};
> +
> +struct xrt_root_ioctl_put_leaf {
> +	struct platform_device *xpipl_pdev; /* caller's pdev */
> +	struct platform_device *xpipl_leaf; /* target's pdev */

caller_pdev;

target_pdev;

> +};
> +
> +struct xrt_root_ioctl_lookup_group {
> +	struct platform_device *xpilp_pdev; /* caller's pdev */
> +	xrt_subdev_match_t xpilp_match_cb;
> +	void *xpilp_match_arg;
> +	int xpilp_grp_inst;
> +};
> +
> +struct xrt_root_ioctl_get_holders {
> +	struct platform_device *xpigh_pdev; /* caller's pdev */
> +	char *xpigh_holder_buf;
> +	size_t xpigh_holder_buf_len;
> +};
> +
> +struct xrt_root_ioctl_get_res {
> +	struct resource *xpigr_res;
> +};
> +
> +struct xrt_root_ioctl_get_id {
> +	unsigned short  xpigi_vendor_id;
> +	unsigned short  xpigi_device_id;
> +	unsigned short  xpigi_sub_vendor_id;
> +	unsigned short  xpigi_sub_device_id;
> +};
> +
> +struct xrt_root_ioctl_hwmon {
> +	bool xpih_register;
> +	const char *xpih_name;
> +	void *xpih_drvdata;
> +	const struct attribute_group **xpih_groups;
> +	struct device *xpih_hwmon_dev;
> +};
> +
> +typedef int (*xrt_subdev_root_cb_t)(struct device *, void *, u32, void *);
This function pointer type is important, please add a comment about its use and expected parameters
> +int xrt_subdev_root_request(struct platform_device *self, u32 cmd, void *arg);
> +
> +/*
> + * Defines physical function (MPF / UPF) specific operations
> + * needed in common root driver.
> + */
> +struct xroot_pf_cb {
> +	void (*xpc_hot_reset)(struct pci_dev *pdev);
This is only ever set to xmgmt_root_hot_reset, why is this abstraction needed ?
> +};
> +
> +int xroot_probe(struct pci_dev *pdev, struct xroot_pf_cb *cb, void **root);
> +void xroot_remove(void *root);
> +bool xroot_wait_for_bringup(void *root);
> +int xroot_add_vsec_node(void *root, char *dtb);
> +int xroot_create_group(void *xr, char *dtb);
> +int xroot_add_simple_node(void *root, char *dtb, const char *endpoint);
> +void xroot_broadcast(void *root, enum xrt_events evt);
> +
> +#endif	/* _XRT_ROOT_H_ */
> diff --git a/drivers/fpga/xrt/mgmt/root.c b/drivers/fpga/xrt/mgmt/root.c
> new file mode 100644
> index 000000000000..583a37c9d30c
> --- /dev/null
> +++ b/drivers/fpga/xrt/mgmt/root.c
> @@ -0,0 +1,342 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Xilinx Alveo Management Function Driver
> + *
> + * Copyright (C) 2020-2021 Xilinx, Inc.
> + *
> + * Authors:
> + *	Cheng Zhen <maxz@xilinx.com>
> + */
> +
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +#include <linux/aer.h>
> +#include <linux/vmalloc.h>
> +#include <linux/delay.h>
> +
> +#include "xroot.h"
> +#include "main-impl.h"
> +#include "metadata.h"
> +
> +#define XMGMT_MODULE_NAME	"xmgmt"
The xrt modules would be more easily identified with a 'xrt' prefix instead of 'x'
> +#define XMGMT_DRIVER_VERSION	"4.0.0"
> +
> +#define XMGMT_PDEV(xm)		((xm)->pdev)
> +#define XMGMT_DEV(xm)		(&(XMGMT_PDEV(xm)->dev))
> +#define xmgmt_err(xm, fmt, args...)	\
> +	dev_err(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
> +#define xmgmt_warn(xm, fmt, args...)	\
> +	dev_warn(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
> +#define xmgmt_info(xm, fmt, args...)	\
> +	dev_info(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
> +#define xmgmt_dbg(xm, fmt, args...)	\
> +	dev_dbg(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
> +#define XMGMT_DEV_ID(_pcidev)			\
> +	({ typeof(_pcidev) (pcidev) = (_pcidev);	\
> +	((pci_domain_nr((pcidev)->bus) << 16) |	\
> +	PCI_DEVID((pcidev)->bus->number, 0)); })
> +
> +static struct class *xmgmt_class;
> +static const struct pci_device_id xmgmt_pci_ids[] = {
> +	{ PCI_DEVICE(0x10EE, 0xd020), }, /* Alveo U50 (golden image) */
> +	{ PCI_DEVICE(0x10EE, 0x5020), }, /* Alveo U50 */

demagic this table, look at dfl-pci for how to use existing #define for the vendor and create a new on for the device.  If there are vf's add them at the same time.

What is a golden image ?

> +	{ 0, }
> +};
> +
> +struct xmgmt {
> +	struct pci_dev *pdev;
> +	void *root;
> +
> +	bool ready;
> +};
> +
> +static int xmgmt_config_pci(struct xmgmt *xm)
> +{
> +	struct pci_dev *pdev = XMGMT_PDEV(xm);
> +	int rc;
> +
> +	rc = pcim_enable_device(pdev);
> +	if (rc < 0) {
> +		xmgmt_err(xm, "failed to enable device: %d", rc);
> +		return rc;
> +	}
> +
> +	rc = pci_enable_pcie_error_reporting(pdev);
> +	if (rc)
> +		xmgmt_warn(xm, "failed to enable AER: %d", rc);
> +
> +	pci_set_master(pdev);
> +
> +	rc = pcie_get_readrq(pdev);
Review this call, it does not go negative
> +	if (rc < 0) {
> +		xmgmt_err(xm, "failed to read mrrs %d", rc);
> +		return rc;
> +	}
this is a quirk, add a comment.
> +	if (rc > 512) {
> +		rc = pcie_set_readrq(pdev, 512);
> +		if (rc) {
> +			xmgmt_err(xm, "failed to force mrrs %d", rc);
similar calls do not fail here.
> +			return rc;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int xmgmt_match_slot_and_save(struct device *dev, void *data)
> +{
> +	struct xmgmt *xm = data;
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +
> +	if (XMGMT_DEV_ID(pdev) == XMGMT_DEV_ID(xm->pdev)) {
> +		pci_cfg_access_lock(pdev);
> +		pci_save_state(pdev);
> +	}
> +
> +	return 0;
> +}
> +
> +static void xmgmt_pci_save_config_all(struct xmgmt *xm)
> +{
> +	bus_for_each_dev(&pci_bus_type, NULL, xm, xmgmt_match_slot_and_save);

This is a bus call, not a device call.

Can this be changed into something like what hot reset does ?

> +}
> +
> +static int xmgmt_match_slot_and_restore(struct device *dev, void *data)
> +{
> +	struct xmgmt *xm = data;
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +
> +	if (XMGMT_DEV_ID(pdev) == XMGMT_DEV_ID(xm->pdev)) {
> +		pci_restore_state(pdev);
> +		pci_cfg_access_unlock(pdev);
> +	}
> +
> +	return 0;
> +}
> +
> +static void xmgmt_pci_restore_config_all(struct xmgmt *xm)
> +{
> +	bus_for_each_dev(&pci_bus_type, NULL, xm, xmgmt_match_slot_and_restore);
> +}
> +
> +static void xmgmt_root_hot_reset(struct pci_dev *pdev)
> +{
> +	struct xmgmt *xm = pci_get_drvdata(pdev);
> +	struct pci_bus *bus;
> +	u8 pci_bctl;
> +	u16 pci_cmd, devctl;
> +	int i, ret;
> +
> +	xmgmt_info(xm, "hot reset start");
> +
> +	xmgmt_pci_save_config_all(xm);
> +
> +	pci_disable_device(pdev);
> +
> +	bus = pdev->bus;
> +
> +	/*
> +	 * When flipping the SBR bit, device can fall off the bus. This is
> +	 * usually no problem at all so long as drivers are working properly
> +	 * after SBR. However, some systems complain bitterly when the device
> +	 * falls off the bus.
> +	 * The quick solution is to temporarily disable the SERR reporting of
> +	 * switch port during SBR.
> +	 */
> +
> +	pci_read_config_word(bus->self, PCI_COMMAND, &pci_cmd);
> +	pci_write_config_word(bus->self, PCI_COMMAND,
> +			      (pci_cmd & ~PCI_COMMAND_SERR));
> +	pcie_capability_read_word(bus->self, PCI_EXP_DEVCTL, &devctl);
> +	pcie_capability_write_word(bus->self, PCI_EXP_DEVCTL,
> +				   (devctl & ~PCI_EXP_DEVCTL_FERE));
> +	pci_read_config_byte(bus->self, PCI_BRIDGE_CONTROL, &pci_bctl);
> +	pci_bctl |= PCI_BRIDGE_CTL_BUS_RESET;
> +	pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);

how the pci config values are set and cleared should be consistent.

this call should be

pci_write_config_byte (... pci_bctl | PCI_BRIDGE_CTL_BUF_RESET )

and the next &= avoided

> +
> +	msleep(100);
> +	pci_bctl &= ~PCI_BRIDGE_CTL_BUS_RESET;
> +	pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
> +	ssleep(1);
> +
> +	pcie_capability_write_word(bus->self, PCI_EXP_DEVCTL, devctl);
> +	pci_write_config_word(bus->self, PCI_COMMAND, pci_cmd);
> +
> +	ret = pci_enable_device(pdev);
> +	if (ret)
> +		xmgmt_err(xm, "failed to enable device, ret %d", ret);
> +
> +	for (i = 0; i < 300; i++) {
> +		pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd);
> +		if (pci_cmd != 0xffff)
what happens with i == 300 and pci_cmd is still 0xffff ?
> +			break;
> +		msleep(20);
> +	}
> +
> +	xmgmt_info(xm, "waiting for %d ms", i * 20);
> +	xmgmt_pci_restore_config_all(xm);
> +	xmgmt_config_pci(xm);
> +}
> +
> +static int xmgmt_create_root_metadata(struct xmgmt *xm, char **root_dtb)
> +{
> +	char *dtb = NULL;
> +	int ret;
> +
> +	ret = xrt_md_create(XMGMT_DEV(xm), &dtb);
> +	if (ret) {
> +		xmgmt_err(xm, "create metadata failed, ret %d", ret);
> +		goto failed;
> +	}
> +
> +	ret = xroot_add_vsec_node(xm->root, dtb);
> +	if (ret == -ENOENT) {
> +		/*
> +		 * We may be dealing with a MFG board.
> +		 * Try vsec-golden which will bring up all hard-coded leaves
> +		 * at hard-coded offsets.
> +		 */
> +		ret = xroot_add_simple_node(xm->root, dtb, XRT_MD_NODE_VSEC_GOLDEN);

Manufacturing diagnostics ?

Tom

> +	} else if (ret == 0) {
> +		ret = xroot_add_simple_node(xm->root, dtb, XRT_MD_NODE_MGMT_MAIN);
> +	}
> +	if (ret)
> +		goto failed;
> +
> +	*root_dtb = dtb;
> +	return 0;
> +
> +failed:
> +	vfree(dtb);
> +	return ret;
> +}
> +
> +static ssize_t ready_show(struct device *dev,
> +			  struct device_attribute *da,
> +			  char *buf)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct xmgmt *xm = pci_get_drvdata(pdev);
> +
> +	return sprintf(buf, "%d\n", xm->ready);
> +}
> +static DEVICE_ATTR_RO(ready);
> +
> +static struct attribute *xmgmt_root_attrs[] = {
> +	&dev_attr_ready.attr,
> +	NULL
> +};
> +
> +static struct attribute_group xmgmt_root_attr_group = {
> +	.attrs = xmgmt_root_attrs,
> +};
> +
> +static struct xroot_pf_cb xmgmt_xroot_pf_cb = {
> +	.xpc_hot_reset = xmgmt_root_hot_reset,
> +};
> +
> +static int xmgmt_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> +{
> +	int ret;
> +	struct device *dev = &pdev->dev;
> +	struct xmgmt *xm = devm_kzalloc(dev, sizeof(*xm), GFP_KERNEL);
> +	char *dtb = NULL;
> +
> +	if (!xm)
> +		return -ENOMEM;
> +	xm->pdev = pdev;
> +	pci_set_drvdata(pdev, xm);
> +
> +	ret = xmgmt_config_pci(xm);
> +	if (ret)
> +		goto failed;
> +
> +	ret = xroot_probe(pdev, &xmgmt_xroot_pf_cb, &xm->root);
> +	if (ret)
> +		goto failed;
> +
> +	ret = xmgmt_create_root_metadata(xm, &dtb);
> +	if (ret)
> +		goto failed_metadata;
> +
> +	ret = xroot_create_group(xm->root, dtb);
> +	vfree(dtb);
> +	if (ret)
> +		xmgmt_err(xm, "failed to create root group: %d", ret);
> +
> +	if (!xroot_wait_for_bringup(xm->root))
> +		xmgmt_err(xm, "failed to bringup all groups");
> +	else
> +		xm->ready = true;
> +
> +	ret = sysfs_create_group(&pdev->dev.kobj, &xmgmt_root_attr_group);
> +	if (ret) {
> +		/* Warning instead of failing the probe. */
> +		xmgmt_warn(xm, "create xmgmt root attrs failed: %d", ret);
> +	}
> +
> +	xroot_broadcast(xm->root, XRT_EVENT_POST_CREATION);
> +	xmgmt_info(xm, "%s started successfully", XMGMT_MODULE_NAME);
> +	return 0;
> +
> +failed_metadata:
> +	(void)xroot_remove(xm->root);
> +failed:
> +	pci_set_drvdata(pdev, NULL);
> +	return ret;
> +}
> +
> +static void xmgmt_remove(struct pci_dev *pdev)
> +{
> +	struct xmgmt *xm = pci_get_drvdata(pdev);
> +
> +	xroot_broadcast(xm->root, XRT_EVENT_PRE_REMOVAL);
> +	sysfs_remove_group(&pdev->dev.kobj, &xmgmt_root_attr_group);
> +	(void)xroot_remove(xm->root);
> +	pci_disable_pcie_error_reporting(xm->pdev);
> +	xmgmt_info(xm, "%s cleaned up successfully", XMGMT_MODULE_NAME);
> +}
> +
> +static struct pci_driver xmgmt_driver = {
> +	.name = XMGMT_MODULE_NAME,
> +	.id_table = xmgmt_pci_ids,
> +	.probe = xmgmt_probe,
> +	.remove = xmgmt_remove,
> +};
> +
> +static int __init xmgmt_init(void)
> +{
> +	int res = 0;
> +
> +	res = xmgmt_main_register_leaf();
> +	if (res)
> +		return res;
> +
> +	xmgmt_class = class_create(THIS_MODULE, XMGMT_MODULE_NAME);
> +	if (IS_ERR(xmgmt_class))
> +		return PTR_ERR(xmgmt_class);
> +
> +	res = pci_register_driver(&xmgmt_driver);
> +	if (res) {
> +		class_destroy(xmgmt_class);
> +		return res;
> +	}
> +
> +	return 0;
> +}
> +
> +static __exit void xmgmt_exit(void)
> +{
> +	pci_unregister_driver(&xmgmt_driver);
> +	class_destroy(xmgmt_class);
> +	xmgmt_main_unregister_leaf();
> +}
> +
> +module_init(xmgmt_init);
> +module_exit(xmgmt_exit);
> +
> +MODULE_DEVICE_TABLE(pci, xmgmt_pci_ids);
> +MODULE_VERSION(XMGMT_DRIVER_VERSION);
> +MODULE_AUTHOR("XRT Team <runtime@xilinx.com>");
> +MODULE_DESCRIPTION("Xilinx Alveo management function driver");
> +MODULE_LICENSE("GPL v2");
Moritz Fischer Feb. 26, 2021, 5:56 p.m. UTC | #2
On Fri, Feb 26, 2021 at 07:01:05AM -0800, Tom Rix wrote:
> A question i do not know the answer to.
> 
> Seems like 'golden' is linked to a manufacturing (diagnostics?) image.

From my brief history with Xilinx Ultrascale+ PCI cards I recall the golden
image being a sort of known good recovery image.

If we can't tell it should probably be explained better :)
> 
> If the public will never see it, should handling it here be done ?

Yes. We do want people to run their entire stack using mainline linux,
not just a part of it, if code is needed to get from recovery image to
full image or similar, then we should support that.

> Moritz, do you know ?
> 
> 
> On 2/17/21 10:40 PM, Lizhi Hou wrote:
> > The PCIE device driver which attaches to management function on Alveo
> to the management
> > devices. It instantiates one or more partition drivers which in turn
> more fpga partition / group ?
> > instantiate platform drivers. The instantiation of partition and platform
> > drivers is completely data driven.
> data driven ? everything is data driven.  do you mean dtb driven ?
> >
> > Signed-off-by: Sonal Santan <sonal.santan@xilinx.com>
> > Signed-off-by: Max Zhen <max.zhen@xilinx.com>
> > Signed-off-by: Lizhi Hou <lizhih@xilinx.com>
> > ---
> >  drivers/fpga/xrt/include/xroot.h | 114 +++++++++++
> >  drivers/fpga/xrt/mgmt/root.c     | 342 +++++++++++++++++++++++++++++++
> >  2 files changed, 456 insertions(+)
> >  create mode 100644 drivers/fpga/xrt/include/xroot.h
> >  create mode 100644 drivers/fpga/xrt/mgmt/root.c
> >
> > diff --git a/drivers/fpga/xrt/include/xroot.h b/drivers/fpga/xrt/include/xroot.h
> > new file mode 100644
> > index 000000000000..752e10daa85e
> > --- /dev/null
> > +++ b/drivers/fpga/xrt/include/xroot.h
> > @@ -0,0 +1,114 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/*
> > + * Header file for Xilinx Runtime (XRT) driver
> > + *
> > + * Copyright (C) 2020-2021 Xilinx, Inc.
> > + *
> > + * Authors:
> > + *	Cheng Zhen <maxz@xilinx.com>
> > + */
> > +
> > +#ifndef _XRT_ROOT_H_
> > +#define _XRT_ROOT_H_
> > +
> > +#include <linux/pci.h>
> > +#include "subdev_id.h"
> > +#include "events.h"
> > +
> > +typedef bool (*xrt_subdev_match_t)(enum xrt_subdev_id,
> > +	struct platform_device *, void *);
> > +#define XRT_SUBDEV_MATCH_PREV	((xrt_subdev_match_t)-1)
> > +#define XRT_SUBDEV_MATCH_NEXT	((xrt_subdev_match_t)-2)
> > +
> > +/*
> > + * Root IOCTL calls.
> > + */
> > +enum xrt_root_ioctl_cmd {
> > +	/* Leaf actions. */
> > +	XRT_ROOT_GET_LEAF = 0,
> > +	XRT_ROOT_PUT_LEAF,
> > +	XRT_ROOT_GET_LEAF_HOLDERS,
> > +
> > +	/* Group actions. */
> > +	XRT_ROOT_CREATE_GROUP,
> > +	XRT_ROOT_REMOVE_GROUP,
> > +	XRT_ROOT_LOOKUP_GROUP,
> > +	XRT_ROOT_WAIT_GROUP_BRINGUP,
> > +
> > +	/* Event actions. */
> > +	XRT_ROOT_EVENT,
> should this be XRT_ROOT_EVENT_SYNC ?
> > +	XRT_ROOT_EVENT_ASYNC,
> > +
> > +	/* Device info. */
> > +	XRT_ROOT_GET_RESOURCE,
> > +	XRT_ROOT_GET_ID,
> > +
> > +	/* Misc. */
> > +	XRT_ROOT_HOT_RESET,
> > +	XRT_ROOT_HWMON,
> > +};
> > +
> > +struct xrt_root_ioctl_get_leaf {
> > +	struct platform_device *xpigl_pdev; /* caller's pdev */
> xpigl_ ? unneeded suffix in element names
> > +	xrt_subdev_match_t xpigl_match_cb;
> > +	void *xpigl_match_arg;
> > +	struct platform_device *xpigl_leaf; /* target leaf pdev */
> > +};
> > +
> > +struct xrt_root_ioctl_put_leaf {
> > +	struct platform_device *xpipl_pdev; /* caller's pdev */
> > +	struct platform_device *xpipl_leaf; /* target's pdev */
> 
> caller_pdev;
> 
> target_pdev;
> 
> > +};
> > +
> > +struct xrt_root_ioctl_lookup_group {
> > +	struct platform_device *xpilp_pdev; /* caller's pdev */
> > +	xrt_subdev_match_t xpilp_match_cb;
> > +	void *xpilp_match_arg;
> > +	int xpilp_grp_inst;
> > +};
> > +
> > +struct xrt_root_ioctl_get_holders {
> > +	struct platform_device *xpigh_pdev; /* caller's pdev */
> > +	char *xpigh_holder_buf;
> > +	size_t xpigh_holder_buf_len;
> > +};
> > +
> > +struct xrt_root_ioctl_get_res {
> > +	struct resource *xpigr_res;
> > +};
> > +
> > +struct xrt_root_ioctl_get_id {
> > +	unsigned short  xpigi_vendor_id;
> > +	unsigned short  xpigi_device_id;
> > +	unsigned short  xpigi_sub_vendor_id;
> > +	unsigned short  xpigi_sub_device_id;
> > +};
> > +
> > +struct xrt_root_ioctl_hwmon {
> > +	bool xpih_register;
> > +	const char *xpih_name;
> > +	void *xpih_drvdata;
> > +	const struct attribute_group **xpih_groups;
> > +	struct device *xpih_hwmon_dev;
> > +};
> > +
> > +typedef int (*xrt_subdev_root_cb_t)(struct device *, void *, u32, void *);
> This function pointer type is important, please add a comment about its use and expected parameters
> > +int xrt_subdev_root_request(struct platform_device *self, u32 cmd, void *arg);
> > +
> > +/*
> > + * Defines physical function (MPF / UPF) specific operations
> > + * needed in common root driver.
> > + */
> > +struct xroot_pf_cb {
> > +	void (*xpc_hot_reset)(struct pci_dev *pdev);
> This is only ever set to xmgmt_root_hot_reset, why is this abstraction needed ?
> > +};
> > +
> > +int xroot_probe(struct pci_dev *pdev, struct xroot_pf_cb *cb, void **root);
> > +void xroot_remove(void *root);
> > +bool xroot_wait_for_bringup(void *root);
> > +int xroot_add_vsec_node(void *root, char *dtb);
> > +int xroot_create_group(void *xr, char *dtb);
> > +int xroot_add_simple_node(void *root, char *dtb, const char *endpoint);
> > +void xroot_broadcast(void *root, enum xrt_events evt);
> > +
> > +#endif	/* _XRT_ROOT_H_ */
> > diff --git a/drivers/fpga/xrt/mgmt/root.c b/drivers/fpga/xrt/mgmt/root.c
> > new file mode 100644
> > index 000000000000..583a37c9d30c
> > --- /dev/null
> > +++ b/drivers/fpga/xrt/mgmt/root.c
> > @@ -0,0 +1,342 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Xilinx Alveo Management Function Driver
> > + *
> > + * Copyright (C) 2020-2021 Xilinx, Inc.
> > + *
> > + * Authors:
> > + *	Cheng Zhen <maxz@xilinx.com>
> > + */
> > +
> > +#include <linux/module.h>
> > +#include <linux/pci.h>
> > +#include <linux/aer.h>
> > +#include <linux/vmalloc.h>
> > +#include <linux/delay.h>
> > +
> > +#include "xroot.h"
> > +#include "main-impl.h"
> > +#include "metadata.h"
> > +
> > +#define XMGMT_MODULE_NAME	"xmgmt"
> The xrt modules would be more easily identified with a 'xrt' prefix instead of 'x'
> > +#define XMGMT_DRIVER_VERSION	"4.0.0"
> > +
> > +#define XMGMT_PDEV(xm)		((xm)->pdev)
> > +#define XMGMT_DEV(xm)		(&(XMGMT_PDEV(xm)->dev))
> > +#define xmgmt_err(xm, fmt, args...)	\
> > +	dev_err(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
> > +#define xmgmt_warn(xm, fmt, args...)	\
> > +	dev_warn(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
> > +#define xmgmt_info(xm, fmt, args...)	\
> > +	dev_info(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
> > +#define xmgmt_dbg(xm, fmt, args...)	\
> > +	dev_dbg(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
> > +#define XMGMT_DEV_ID(_pcidev)			\
> > +	({ typeof(_pcidev) (pcidev) = (_pcidev);	\
> > +	((pci_domain_nr((pcidev)->bus) << 16) |	\
> > +	PCI_DEVID((pcidev)->bus->number, 0)); })
> > +
> > +static struct class *xmgmt_class;
> > +static const struct pci_device_id xmgmt_pci_ids[] = {
> > +	{ PCI_DEVICE(0x10EE, 0xd020), }, /* Alveo U50 (golden image) */
> > +	{ PCI_DEVICE(0x10EE, 0x5020), }, /* Alveo U50 */
> 
> demagic this table, look at dfl-pci for how to use existing #define for the vendor and create a new on for the device.  If there are vf's add them at the same time.
> 
> What is a golden image ?
> 
> > +	{ 0, }
> > +};
> > +
> > +struct xmgmt {
> > +	struct pci_dev *pdev;
> > +	void *root;
> > +
> > +	bool ready;
> > +};
> > +
> > +static int xmgmt_config_pci(struct xmgmt *xm)
> > +{
> > +	struct pci_dev *pdev = XMGMT_PDEV(xm);
> > +	int rc;
> > +
> > +	rc = pcim_enable_device(pdev);
> > +	if (rc < 0) {
> > +		xmgmt_err(xm, "failed to enable device: %d", rc);
> > +		return rc;
> > +	}
> > +
> > +	rc = pci_enable_pcie_error_reporting(pdev);
> > +	if (rc)
> > +		xmgmt_warn(xm, "failed to enable AER: %d", rc);
> > +
> > +	pci_set_master(pdev);
> > +
> > +	rc = pcie_get_readrq(pdev);
> Review this call, it does not go negative
> > +	if (rc < 0) {
> > +		xmgmt_err(xm, "failed to read mrrs %d", rc);
> > +		return rc;
> > +	}
> this is a quirk, add a comment.
> > +	if (rc > 512) {
> > +		rc = pcie_set_readrq(pdev, 512);
> > +		if (rc) {
> > +			xmgmt_err(xm, "failed to force mrrs %d", rc);
> similar calls do not fail here.
> > +			return rc;
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int xmgmt_match_slot_and_save(struct device *dev, void *data)
> > +{
> > +	struct xmgmt *xm = data;
> > +	struct pci_dev *pdev = to_pci_dev(dev);
> > +
> > +	if (XMGMT_DEV_ID(pdev) == XMGMT_DEV_ID(xm->pdev)) {
> > +		pci_cfg_access_lock(pdev);
> > +		pci_save_state(pdev);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static void xmgmt_pci_save_config_all(struct xmgmt *xm)
> > +{
> > +	bus_for_each_dev(&pci_bus_type, NULL, xm, xmgmt_match_slot_and_save);
> 
> This is a bus call, not a device call.
> 
> Can this be changed into something like what hot reset does ?
> 
> > +}
> > +
> > +static int xmgmt_match_slot_and_restore(struct device *dev, void *data)
> > +{
> > +	struct xmgmt *xm = data;
> > +	struct pci_dev *pdev = to_pci_dev(dev);
> > +
> > +	if (XMGMT_DEV_ID(pdev) == XMGMT_DEV_ID(xm->pdev)) {
> > +		pci_restore_state(pdev);
> > +		pci_cfg_access_unlock(pdev);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static void xmgmt_pci_restore_config_all(struct xmgmt *xm)
> > +{
> > +	bus_for_each_dev(&pci_bus_type, NULL, xm, xmgmt_match_slot_and_restore);
> > +}
> > +
> > +static void xmgmt_root_hot_reset(struct pci_dev *pdev)
> > +{
> > +	struct xmgmt *xm = pci_get_drvdata(pdev);
> > +	struct pci_bus *bus;
> > +	u8 pci_bctl;
> > +	u16 pci_cmd, devctl;
> > +	int i, ret;
> > +
> > +	xmgmt_info(xm, "hot reset start");
> > +
> > +	xmgmt_pci_save_config_all(xm);
> > +
> > +	pci_disable_device(pdev);
> > +
> > +	bus = pdev->bus;
> > +
> > +	/*
> > +	 * When flipping the SBR bit, device can fall off the bus. This is
> > +	 * usually no problem at all so long as drivers are working properly
> > +	 * after SBR. However, some systems complain bitterly when the device
> > +	 * falls off the bus.
> > +	 * The quick solution is to temporarily disable the SERR reporting of
> > +	 * switch port during SBR.
> > +	 */
> > +
> > +	pci_read_config_word(bus->self, PCI_COMMAND, &pci_cmd);
> > +	pci_write_config_word(bus->self, PCI_COMMAND,
> > +			      (pci_cmd & ~PCI_COMMAND_SERR));
> > +	pcie_capability_read_word(bus->self, PCI_EXP_DEVCTL, &devctl);
> > +	pcie_capability_write_word(bus->self, PCI_EXP_DEVCTL,
> > +				   (devctl & ~PCI_EXP_DEVCTL_FERE));
> > +	pci_read_config_byte(bus->self, PCI_BRIDGE_CONTROL, &pci_bctl);
> > +	pci_bctl |= PCI_BRIDGE_CTL_BUS_RESET;
> > +	pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
> 
> how the pci config values are set and cleared should be consistent.
> 
> this call should be
> 
> pci_write_config_byte (... pci_bctl | PCI_BRIDGE_CTL_BUF_RESET )
> 
> and the next &= avoided
> 
> > +
> > +	msleep(100);
> > +	pci_bctl &= ~PCI_BRIDGE_CTL_BUS_RESET;
> > +	pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
> > +	ssleep(1);
> > +
> > +	pcie_capability_write_word(bus->self, PCI_EXP_DEVCTL, devctl);
> > +	pci_write_config_word(bus->self, PCI_COMMAND, pci_cmd);
> > +
> > +	ret = pci_enable_device(pdev);
> > +	if (ret)
> > +		xmgmt_err(xm, "failed to enable device, ret %d", ret);
> > +
> > +	for (i = 0; i < 300; i++) {
> > +		pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd);
> > +		if (pci_cmd != 0xffff)
> what happens with i == 300 and pci_cmd is still 0xffff ?
> > +			break;
> > +		msleep(20);
> > +	}
> > +
> > +	xmgmt_info(xm, "waiting for %d ms", i * 20);
> > +	xmgmt_pci_restore_config_all(xm);
> > +	xmgmt_config_pci(xm);
> > +}
> > +
> > +static int xmgmt_create_root_metadata(struct xmgmt *xm, char **root_dtb)
> > +{
> > +	char *dtb = NULL;
> > +	int ret;
> > +
> > +	ret = xrt_md_create(XMGMT_DEV(xm), &dtb);
> > +	if (ret) {
> > +		xmgmt_err(xm, "create metadata failed, ret %d", ret);
> > +		goto failed;
> > +	}
> > +
> > +	ret = xroot_add_vsec_node(xm->root, dtb);
> > +	if (ret == -ENOENT) {
> > +		/*
> > +		 * We may be dealing with a MFG board.
> > +		 * Try vsec-golden which will bring up all hard-coded leaves
> > +		 * at hard-coded offsets.
> > +		 */
> > +		ret = xroot_add_simple_node(xm->root, dtb, XRT_MD_NODE_VSEC_GOLDEN);
> 
> Manufacturing diagnostics ?
> 
> Tom
> 
> > +	} else if (ret == 0) {
> > +		ret = xroot_add_simple_node(xm->root, dtb, XRT_MD_NODE_MGMT_MAIN);
> > +	}
> > +	if (ret)
> > +		goto failed;
> > +
> > +	*root_dtb = dtb;
> > +	return 0;
> > +
> > +failed:
> > +	vfree(dtb);
> > +	return ret;
> > +}
> > +
> > +static ssize_t ready_show(struct device *dev,
> > +			  struct device_attribute *da,
> > +			  char *buf)
> > +{
> > +	struct pci_dev *pdev = to_pci_dev(dev);
> > +	struct xmgmt *xm = pci_get_drvdata(pdev);
> > +
> > +	return sprintf(buf, "%d\n", xm->ready);
> > +}
> > +static DEVICE_ATTR_RO(ready);
> > +
> > +static struct attribute *xmgmt_root_attrs[] = {
> > +	&dev_attr_ready.attr,
> > +	NULL
> > +};
> > +
> > +static struct attribute_group xmgmt_root_attr_group = {
> > +	.attrs = xmgmt_root_attrs,
> > +};
> > +
> > +static struct xroot_pf_cb xmgmt_xroot_pf_cb = {
> > +	.xpc_hot_reset = xmgmt_root_hot_reset,
> > +};
> > +
> > +static int xmgmt_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> > +{
> > +	int ret;
> > +	struct device *dev = &pdev->dev;
> > +	struct xmgmt *xm = devm_kzalloc(dev, sizeof(*xm), GFP_KERNEL);
> > +	char *dtb = NULL;
> > +
> > +	if (!xm)
> > +		return -ENOMEM;
> > +	xm->pdev = pdev;
> > +	pci_set_drvdata(pdev, xm);
> > +
> > +	ret = xmgmt_config_pci(xm);
> > +	if (ret)
> > +		goto failed;
> > +
> > +	ret = xroot_probe(pdev, &xmgmt_xroot_pf_cb, &xm->root);
> > +	if (ret)
> > +		goto failed;
> > +
> > +	ret = xmgmt_create_root_metadata(xm, &dtb);
> > +	if (ret)
> > +		goto failed_metadata;
> > +
> > +	ret = xroot_create_group(xm->root, dtb);
> > +	vfree(dtb);
> > +	if (ret)
> > +		xmgmt_err(xm, "failed to create root group: %d", ret);
> > +
> > +	if (!xroot_wait_for_bringup(xm->root))
> > +		xmgmt_err(xm, "failed to bringup all groups");
> > +	else
> > +		xm->ready = true;
> > +
> > +	ret = sysfs_create_group(&pdev->dev.kobj, &xmgmt_root_attr_group);
> > +	if (ret) {
> > +		/* Warning instead of failing the probe. */
> > +		xmgmt_warn(xm, "create xmgmt root attrs failed: %d", ret);
> > +	}
> > +
> > +	xroot_broadcast(xm->root, XRT_EVENT_POST_CREATION);
> > +	xmgmt_info(xm, "%s started successfully", XMGMT_MODULE_NAME);
> > +	return 0;
> > +
> > +failed_metadata:
> > +	(void)xroot_remove(xm->root);
> > +failed:
> > +	pci_set_drvdata(pdev, NULL);
> > +	return ret;
> > +}
> > +
> > +static void xmgmt_remove(struct pci_dev *pdev)
> > +{
> > +	struct xmgmt *xm = pci_get_drvdata(pdev);
> > +
> > +	xroot_broadcast(xm->root, XRT_EVENT_PRE_REMOVAL);
> > +	sysfs_remove_group(&pdev->dev.kobj, &xmgmt_root_attr_group);
> > +	(void)xroot_remove(xm->root);
> > +	pci_disable_pcie_error_reporting(xm->pdev);
> > +	xmgmt_info(xm, "%s cleaned up successfully", XMGMT_MODULE_NAME);
> > +}
> > +
> > +static struct pci_driver xmgmt_driver = {
> > +	.name = XMGMT_MODULE_NAME,
> > +	.id_table = xmgmt_pci_ids,
> > +	.probe = xmgmt_probe,
> > +	.remove = xmgmt_remove,
> > +};
> > +
> > +static int __init xmgmt_init(void)
> > +{
> > +	int res = 0;
> > +
> > +	res = xmgmt_main_register_leaf();
> > +	if (res)
> > +		return res;
> > +
> > +	xmgmt_class = class_create(THIS_MODULE, XMGMT_MODULE_NAME);
> > +	if (IS_ERR(xmgmt_class))
> > +		return PTR_ERR(xmgmt_class);
> > +
> > +	res = pci_register_driver(&xmgmt_driver);
> > +	if (res) {
> > +		class_destroy(xmgmt_class);
> > +		return res;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static __exit void xmgmt_exit(void)
> > +{
> > +	pci_unregister_driver(&xmgmt_driver);
> > +	class_destroy(xmgmt_class);
> > +	xmgmt_main_unregister_leaf();
> > +}
> > +
> > +module_init(xmgmt_init);
> > +module_exit(xmgmt_exit);
> > +
> > +MODULE_DEVICE_TABLE(pci, xmgmt_pci_ids);
> > +MODULE_VERSION(XMGMT_DRIVER_VERSION);
> > +MODULE_AUTHOR("XRT Team <runtime@xilinx.com>");
> > +MODULE_DESCRIPTION("Xilinx Alveo management function driver");
> > +MODULE_LICENSE("GPL v2");
> 
- Moritz
Max Zhen March 16, 2021, 8:29 p.m. UTC | #3
Hi Tom,


On 2/26/21 7:01 AM, Tom Rix wrote:
> CAUTION: This message has originated from an External Source. Please use proper judgment and caution when opening attachments, clicking links, or responding to this email.
>
>
> A question i do not know the answer to.
>
> Seems like 'golden' is linked to a manufacturing (diagnostics?) image.
>
> If the public will never see it, should handling it here be done ?
>
> Moritz, do you know ?


Golden image is preloaded on the device when it is shipped to customer. 
Then, customer can load other shells (from Xilinx or some other vendor). 
If something goes wrong with the shell, customer can always go back to 
golden and start over again. So, golden image is going to be used in 
public, not just internally by Xilinx.


>
>
> On 2/17/21 10:40 PM, Lizhi Hou wrote:
>> The PCIE device driver which attaches to management function on Alveo
> to the management


Sure.


>> devices. It instantiates one or more partition drivers which in turn
> more fpga partition / group ?


Group driver.


>> instantiate platform drivers. The instantiation of partition and platform
>> drivers is completely data driven.
> data driven ? everything is data driven.  do you mean dtb driven ?


Data driven means not hard-coded. Here data means meta data which is 
presented in device tree format, dtb.


>> Signed-off-by: Sonal Santan <sonal.santan@xilinx.com>
>> Signed-off-by: Max Zhen <max.zhen@xilinx.com>
>> Signed-off-by: Lizhi Hou <lizhih@xilinx.com>
>> ---
>>   drivers/fpga/xrt/include/xroot.h | 114 +++++++++++
>>   drivers/fpga/xrt/mgmt/root.c     | 342 +++++++++++++++++++++++++++++++
>>   2 files changed, 456 insertions(+)
>>   create mode 100644 drivers/fpga/xrt/include/xroot.h
>>   create mode 100644 drivers/fpga/xrt/mgmt/root.c
>>
>> diff --git a/drivers/fpga/xrt/include/xroot.h b/drivers/fpga/xrt/include/xroot.h
>> new file mode 100644
>> index 000000000000..752e10daa85e
>> --- /dev/null
>> +++ b/drivers/fpga/xrt/include/xroot.h
>> @@ -0,0 +1,114 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Header file for Xilinx Runtime (XRT) driver
>> + *
>> + * Copyright (C) 2020-2021 Xilinx, Inc.
>> + *
>> + * Authors:
>> + *   Cheng Zhen <maxz@xilinx.com>
>> + */
>> +
>> +#ifndef _XRT_ROOT_H_
>> +#define _XRT_ROOT_H_
>> +
>> +#include <linux/pci.h>
>> +#include "subdev_id.h"
>> +#include "events.h"
>> +
>> +typedef bool (*xrt_subdev_match_t)(enum xrt_subdev_id,
>> +     struct platform_device *, void *);
>> +#define XRT_SUBDEV_MATCH_PREV        ((xrt_subdev_match_t)-1)
>> +#define XRT_SUBDEV_MATCH_NEXT        ((xrt_subdev_match_t)-2)
>> +
>> +/*
>> + * Root IOCTL calls.
>> + */
>> +enum xrt_root_ioctl_cmd {
>> +     /* Leaf actions. */
>> +     XRT_ROOT_GET_LEAF = 0,
>> +     XRT_ROOT_PUT_LEAF,
>> +     XRT_ROOT_GET_LEAF_HOLDERS,
>> +
>> +     /* Group actions. */
>> +     XRT_ROOT_CREATE_GROUP,
>> +     XRT_ROOT_REMOVE_GROUP,
>> +     XRT_ROOT_LOOKUP_GROUP,
>> +     XRT_ROOT_WAIT_GROUP_BRINGUP,
>> +
>> +     /* Event actions. */
>> +     XRT_ROOT_EVENT,
> should this be XRT_ROOT_EVENT_SYNC ?


Sure.


>> +     XRT_ROOT_EVENT_ASYNC,
>> +
>> +     /* Device info. */
>> +     XRT_ROOT_GET_RESOURCE,
>> +     XRT_ROOT_GET_ID,
>> +
>> +     /* Misc. */
>> +     XRT_ROOT_HOT_RESET,
>> +     XRT_ROOT_HWMON,
>> +};
>> +
>> +struct xrt_root_ioctl_get_leaf {
>> +     struct platform_device *xpigl_pdev; /* caller's pdev */
> xpigl_ ? unneeded suffix in element names


It's needed since the it might be included and used in > 1 .c files. I'd 
like to keep it's name unique.


>> +     xrt_subdev_match_t xpigl_match_cb;
>> +     void *xpigl_match_arg;
>> +     struct platform_device *xpigl_leaf; /* target leaf pdev */
>> +};
>> +
>> +struct xrt_root_ioctl_put_leaf {
>> +     struct platform_device *xpipl_pdev; /* caller's pdev */
>> +     struct platform_device *xpipl_leaf; /* target's pdev */
> caller_pdev;
>
> target_pdev;


Sure.


>
>> +};
>> +
>> +struct xrt_root_ioctl_lookup_group {
>> +     struct platform_device *xpilp_pdev; /* caller's pdev */
>> +     xrt_subdev_match_t xpilp_match_cb;
>> +     void *xpilp_match_arg;
>> +     int xpilp_grp_inst;
>> +};
>> +
>> +struct xrt_root_ioctl_get_holders {
>> +     struct platform_device *xpigh_pdev; /* caller's pdev */
>> +     char *xpigh_holder_buf;
>> +     size_t xpigh_holder_buf_len;
>> +};
>> +
>> +struct xrt_root_ioctl_get_res {
>> +     struct resource *xpigr_res;
>> +};
>> +
>> +struct xrt_root_ioctl_get_id {
>> +     unsigned short  xpigi_vendor_id;
>> +     unsigned short  xpigi_device_id;
>> +     unsigned short  xpigi_sub_vendor_id;
>> +     unsigned short  xpigi_sub_device_id;
>> +};
>> +
>> +struct xrt_root_ioctl_hwmon {
>> +     bool xpih_register;
>> +     const char *xpih_name;
>> +     void *xpih_drvdata;
>> +     const struct attribute_group **xpih_groups;
>> +     struct device *xpih_hwmon_dev;
>> +};
>> +
>> +typedef int (*xrt_subdev_root_cb_t)(struct device *, void *, u32, void *);
> This function pointer type is important, please add a comment about its use and expected parameters


Added.


>> +int xrt_subdev_root_request(struct platform_device *self, u32 cmd, void *arg);
>> +
>> +/*
>> + * Defines physical function (MPF / UPF) specific operations
>> + * needed in common root driver.
>> + */
>> +struct xroot_pf_cb {
>> +     void (*xpc_hot_reset)(struct pci_dev *pdev);
> This is only ever set to xmgmt_root_hot_reset, why is this abstraction needed ?


As comment says, hot reset is implemented differently in MPF and UPF 
driver. So, we need this callback in this common code. Note that we have 
not added UPF code in our initial patch yet. It will be added in the future.


>> +};
>> +
>> +int xroot_probe(struct pci_dev *pdev, struct xroot_pf_cb *cb, void **root);
>> +void xroot_remove(void *root);
>> +bool xroot_wait_for_bringup(void *root);
>> +int xroot_add_vsec_node(void *root, char *dtb);
>> +int xroot_create_group(void *xr, char *dtb);
>> +int xroot_add_simple_node(void *root, char *dtb, const char *endpoint);
>> +void xroot_broadcast(void *root, enum xrt_events evt);
>> +
>> +#endif       /* _XRT_ROOT_H_ */
>> diff --git a/drivers/fpga/xrt/mgmt/root.c b/drivers/fpga/xrt/mgmt/root.c
>> new file mode 100644
>> index 000000000000..583a37c9d30c
>> --- /dev/null
>> +++ b/drivers/fpga/xrt/mgmt/root.c
>> @@ -0,0 +1,342 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Xilinx Alveo Management Function Driver
>> + *
>> + * Copyright (C) 2020-2021 Xilinx, Inc.
>> + *
>> + * Authors:
>> + *   Cheng Zhen <maxz@xilinx.com>
>> + */
>> +
>> +#include <linux/module.h>
>> +#include <linux/pci.h>
>> +#include <linux/aer.h>
>> +#include <linux/vmalloc.h>
>> +#include <linux/delay.h>
>> +
>> +#include "xroot.h"
>> +#include "main-impl.h"
>> +#include "metadata.h"
>> +
>> +#define XMGMT_MODULE_NAME    "xmgmt"
> The xrt modules would be more easily identified with a 'xrt' prefix instead of 'x'


We will change the module name to xrt-mgmt.


>> +#define XMGMT_DRIVER_VERSION "4.0.0"
>> +
>> +#define XMGMT_PDEV(xm)               ((xm)->pdev)
>> +#define XMGMT_DEV(xm)                (&(XMGMT_PDEV(xm)->dev))
>> +#define xmgmt_err(xm, fmt, args...)  \
>> +     dev_err(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
>> +#define xmgmt_warn(xm, fmt, args...) \
>> +     dev_warn(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
>> +#define xmgmt_info(xm, fmt, args...) \
>> +     dev_info(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
>> +#define xmgmt_dbg(xm, fmt, args...)  \
>> +     dev_dbg(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
>> +#define XMGMT_DEV_ID(_pcidev)                        \
>> +     ({ typeof(_pcidev) (pcidev) = (_pcidev);        \
>> +     ((pci_domain_nr((pcidev)->bus) << 16) | \
>> +     PCI_DEVID((pcidev)->bus->number, 0)); })
>> +
>> +static struct class *xmgmt_class;
>> +static const struct pci_device_id xmgmt_pci_ids[] = {
>> +     { PCI_DEVICE(0x10EE, 0xd020), }, /* Alveo U50 (golden image) */
>> +     { PCI_DEVICE(0x10EE, 0x5020), }, /* Alveo U50 */
> demagic this table, look at dfl-pci for how to use existing #define for the vendor and create a new on for the device.  If there are vf's add them at the same time.
>
> What is a golden image ?


Fixed. Please see my comments above for golden image.


>
>> +     { 0, }
>> +};
>> +
>> +struct xmgmt {
>> +     struct pci_dev *pdev;
>> +     void *root;
>> +
>> +     bool ready;
>> +};
>> +
>> +static int xmgmt_config_pci(struct xmgmt *xm)
>> +{
>> +     struct pci_dev *pdev = XMGMT_PDEV(xm);
>> +     int rc;
>> +
>> +     rc = pcim_enable_device(pdev);
>> +     if (rc < 0) {
>> +             xmgmt_err(xm, "failed to enable device: %d", rc);
>> +             return rc;
>> +     }
>> +
>> +     rc = pci_enable_pcie_error_reporting(pdev);
>> +     if (rc)
>> +             xmgmt_warn(xm, "failed to enable AER: %d", rc);
>> +
>> +     pci_set_master(pdev);
>> +
>> +     rc = pcie_get_readrq(pdev);
> Review this call, it does not go negative


I'll remove the check against negative value.


>> +     if (rc < 0) {
>> +             xmgmt_err(xm, "failed to read mrrs %d", rc);
>> +             return rc;
>> +     }
> this is a quirk, add a comment.


Will remove.


>> +     if (rc > 512) {
>> +             rc = pcie_set_readrq(pdev, 512);
>> +             if (rc) {
>> +                     xmgmt_err(xm, "failed to force mrrs %d", rc);
> similar calls do not fail here.


Will remove.


>> +                     return rc;
>> +             }
>> +     }
>> +
>> +     return 0;
>> +}
>> +
>> +static int xmgmt_match_slot_and_save(struct device *dev, void *data)
>> +{
>> +     struct xmgmt *xm = data;
>> +     struct pci_dev *pdev = to_pci_dev(dev);
>> +
>> +     if (XMGMT_DEV_ID(pdev) == XMGMT_DEV_ID(xm->pdev)) {
>> +             pci_cfg_access_lock(pdev);
>> +             pci_save_state(pdev);
>> +     }
>> +
>> +     return 0;
>> +}
>> +
>> +static void xmgmt_pci_save_config_all(struct xmgmt *xm)
>> +{
>> +     bus_for_each_dev(&pci_bus_type, NULL, xm, xmgmt_match_slot_and_save);
> This is a bus call, not a device call.
>
> Can this be changed into something like what hot reset does ?


We are working on both mgmt pf and user pf here, so sort of like a bus. 
But, it might be better to refactor this when we have our own bus type 
implementation. We do not need to make PCIE bus call. We will fix this 
in V5 patch set where we'll implement our own bus type.


>
>> +}
>> +
>> +static int xmgmt_match_slot_and_restore(struct device *dev, void *data)
>> +{
>> +     struct xmgmt *xm = data;
>> +     struct pci_dev *pdev = to_pci_dev(dev);
>> +
>> +     if (XMGMT_DEV_ID(pdev) == XMGMT_DEV_ID(xm->pdev)) {
>> +             pci_restore_state(pdev);
>> +             pci_cfg_access_unlock(pdev);
>> +     }
>> +
>> +     return 0;
>> +}
>> +
>> +static void xmgmt_pci_restore_config_all(struct xmgmt *xm)
>> +{
>> +     bus_for_each_dev(&pci_bus_type, NULL, xm, xmgmt_match_slot_and_restore);
>> +}
>> +
>> +static void xmgmt_root_hot_reset(struct pci_dev *pdev)
>> +{
>> +     struct xmgmt *xm = pci_get_drvdata(pdev);
>> +     struct pci_bus *bus;
>> +     u8 pci_bctl;
>> +     u16 pci_cmd, devctl;
>> +     int i, ret;
>> +
>> +     xmgmt_info(xm, "hot reset start");
>> +
>> +     xmgmt_pci_save_config_all(xm);
>> +
>> +     pci_disable_device(pdev);
>> +
>> +     bus = pdev->bus;
>> +
>> +     /*
>> +      * When flipping the SBR bit, device can fall off the bus. This is
>> +      * usually no problem at all so long as drivers are working properly
>> +      * after SBR. However, some systems complain bitterly when the device
>> +      * falls off the bus.
>> +      * The quick solution is to temporarily disable the SERR reporting of
>> +      * switch port during SBR.
>> +      */
>> +
>> +     pci_read_config_word(bus->self, PCI_COMMAND, &pci_cmd);
>> +     pci_write_config_word(bus->self, PCI_COMMAND,
>> +                           (pci_cmd & ~PCI_COMMAND_SERR));
>> +     pcie_capability_read_word(bus->self, PCI_EXP_DEVCTL, &devctl);
>> +     pcie_capability_write_word(bus->self, PCI_EXP_DEVCTL,
>> +                                (devctl & ~PCI_EXP_DEVCTL_FERE));
>> +     pci_read_config_byte(bus->self, PCI_BRIDGE_CONTROL, &pci_bctl);
>> +     pci_bctl |= PCI_BRIDGE_CTL_BUS_RESET;
>> +     pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
> how the pci config values are set and cleared should be consistent.
>
> this call should be
>
> pci_write_config_byte (... pci_bctl | PCI_BRIDGE_CTL_BUF_RESET )
>
> and the next &= avoided


Sure.


>
>> +
>> +     msleep(100);
>> +     pci_bctl &= ~PCI_BRIDGE_CTL_BUS_RESET;
>> +     pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
>> +     ssleep(1);
>> +
>> +     pcie_capability_write_word(bus->self, PCI_EXP_DEVCTL, devctl);
>> +     pci_write_config_word(bus->self, PCI_COMMAND, pci_cmd);
>> +
>> +     ret = pci_enable_device(pdev);
>> +     if (ret)
>> +             xmgmt_err(xm, "failed to enable device, ret %d", ret);
>> +
>> +     for (i = 0; i < 300; i++) {
>> +             pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd);
>> +             if (pci_cmd != 0xffff)
> what happens with i == 300 and pci_cmd is still 0xffff ?


Something wrong happens to the device since it's not coming back after 
the reset. In this case, the device cannot be used and the only way to 
recover is to power cycle the system so that the shell can be reloaded 
from the flash on the device.


>> +                     break;
>> +             msleep(20);
>> +     }
>> +
>> +     xmgmt_info(xm, "waiting for %d ms", i * 20);
>> +     xmgmt_pci_restore_config_all(xm);
>> +     xmgmt_config_pci(xm);
>> +}
>> +
>> +static int xmgmt_create_root_metadata(struct xmgmt *xm, char **root_dtb)
>> +{
>> +     char *dtb = NULL;
>> +     int ret;
>> +
>> +     ret = xrt_md_create(XMGMT_DEV(xm), &dtb);
>> +     if (ret) {
>> +             xmgmt_err(xm, "create metadata failed, ret %d", ret);
>> +             goto failed;
>> +     }
>> +
>> +     ret = xroot_add_vsec_node(xm->root, dtb);
>> +     if (ret == -ENOENT) {
>> +             /*
>> +              * We may be dealing with a MFG board.
>> +              * Try vsec-golden which will bring up all hard-coded leaves
>> +              * at hard-coded offsets.
>> +              */
>> +             ret = xroot_add_simple_node(xm->root, dtb, XRT_MD_NODE_VSEC_GOLDEN);
> Manufacturing diagnostics ?


This is for golden image support. Please see my comments above.


Thanks,

Max

>
> Tom
>
>> +     } else if (ret == 0) {
>> +             ret = xroot_add_simple_node(xm->root, dtb, XRT_MD_NODE_MGMT_MAIN);
>> +     }
>> +     if (ret)
>> +             goto failed;
>> +
>> +     *root_dtb = dtb;
>> +     return 0;
>> +
>> +failed:
>> +     vfree(dtb);
>> +     return ret;
>> +}
>> +
>> +static ssize_t ready_show(struct device *dev,
>> +                       struct device_attribute *da,
>> +                       char *buf)
>> +{
>> +     struct pci_dev *pdev = to_pci_dev(dev);
>> +     struct xmgmt *xm = pci_get_drvdata(pdev);
>> +
>> +     return sprintf(buf, "%d\n", xm->ready);
>> +}
>> +static DEVICE_ATTR_RO(ready);
>> +
>> +static struct attribute *xmgmt_root_attrs[] = {
>> +     &dev_attr_ready.attr,
>> +     NULL
>> +};
>> +
>> +static struct attribute_group xmgmt_root_attr_group = {
>> +     .attrs = xmgmt_root_attrs,
>> +};
>> +
>> +static struct xroot_pf_cb xmgmt_xroot_pf_cb = {
>> +     .xpc_hot_reset = xmgmt_root_hot_reset,
>> +};
>> +
>> +static int xmgmt_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>> +{
>> +     int ret;
>> +     struct device *dev = &pdev->dev;
>> +     struct xmgmt *xm = devm_kzalloc(dev, sizeof(*xm), GFP_KERNEL);
>> +     char *dtb = NULL;
>> +
>> +     if (!xm)
>> +             return -ENOMEM;
>> +     xm->pdev = pdev;
>> +     pci_set_drvdata(pdev, xm);
>> +
>> +     ret = xmgmt_config_pci(xm);
>> +     if (ret)
>> +             goto failed;
>> +
>> +     ret = xroot_probe(pdev, &xmgmt_xroot_pf_cb, &xm->root);
>> +     if (ret)
>> +             goto failed;
>> +
>> +     ret = xmgmt_create_root_metadata(xm, &dtb);
>> +     if (ret)
>> +             goto failed_metadata;
>> +
>> +     ret = xroot_create_group(xm->root, dtb);
>> +     vfree(dtb);
>> +     if (ret)
>> +             xmgmt_err(xm, "failed to create root group: %d", ret);
>> +
>> +     if (!xroot_wait_for_bringup(xm->root))
>> +             xmgmt_err(xm, "failed to bringup all groups");
>> +     else
>> +             xm->ready = true;
>> +
>> +     ret = sysfs_create_group(&pdev->dev.kobj, &xmgmt_root_attr_group);
>> +     if (ret) {
>> +             /* Warning instead of failing the probe. */
>> +             xmgmt_warn(xm, "create xmgmt root attrs failed: %d", ret);
>> +     }
>> +
>> +     xroot_broadcast(xm->root, XRT_EVENT_POST_CREATION);
>> +     xmgmt_info(xm, "%s started successfully", XMGMT_MODULE_NAME);
>> +     return 0;
>> +
>> +failed_metadata:
>> +     (void)xroot_remove(xm->root);
>> +failed:
>> +     pci_set_drvdata(pdev, NULL);
>> +     return ret;
>> +}
>> +
>> +static void xmgmt_remove(struct pci_dev *pdev)
>> +{
>> +     struct xmgmt *xm = pci_get_drvdata(pdev);
>> +
>> +     xroot_broadcast(xm->root, XRT_EVENT_PRE_REMOVAL);
>> +     sysfs_remove_group(&pdev->dev.kobj, &xmgmt_root_attr_group);
>> +     (void)xroot_remove(xm->root);
>> +     pci_disable_pcie_error_reporting(xm->pdev);
>> +     xmgmt_info(xm, "%s cleaned up successfully", XMGMT_MODULE_NAME);
>> +}
>> +
>> +static struct pci_driver xmgmt_driver = {
>> +     .name = XMGMT_MODULE_NAME,
>> +     .id_table = xmgmt_pci_ids,
>> +     .probe = xmgmt_probe,
>> +     .remove = xmgmt_remove,
>> +};
>> +
>> +static int __init xmgmt_init(void)
>> +{
>> +     int res = 0;
>> +
>> +     res = xmgmt_main_register_leaf();
>> +     if (res)
>> +             return res;
>> +
>> +     xmgmt_class = class_create(THIS_MODULE, XMGMT_MODULE_NAME);
>> +     if (IS_ERR(xmgmt_class))
>> +             return PTR_ERR(xmgmt_class);
>> +
>> +     res = pci_register_driver(&xmgmt_driver);
>> +     if (res) {
>> +             class_destroy(xmgmt_class);
>> +             return res;
>> +     }
>> +
>> +     return 0;
>> +}
>> +
>> +static __exit void xmgmt_exit(void)
>> +{
>> +     pci_unregister_driver(&xmgmt_driver);
>> +     class_destroy(xmgmt_class);
>> +     xmgmt_main_unregister_leaf();
>> +}
>> +
>> +module_init(xmgmt_init);
>> +module_exit(xmgmt_exit);
>> +
>> +MODULE_DEVICE_TABLE(pci, xmgmt_pci_ids);
>> +MODULE_VERSION(XMGMT_DRIVER_VERSION);
>> +MODULE_AUTHOR("XRT Team <runtime@xilinx.com>");
>> +MODULE_DESCRIPTION("Xilinx Alveo management function driver");
>> +MODULE_LICENSE("GPL v2");
Tom Rix March 17, 2021, 9:08 p.m. UTC | #4
On 3/16/21 1:29 PM, Max Zhen wrote:
> Hi Tom,
>
>
> On 2/26/21 7:01 AM, Tom Rix wrote:
>> CAUTION: This message has originated from an External Source. Please use proper judgment and caution when opening attachments, clicking links, or responding to this email.
>>
>>
>> A question i do not know the answer to.
>>
>> Seems like 'golden' is linked to a manufacturing (diagnostics?) image.
>>
>> If the public will never see it, should handling it here be done ?
>>
>> Moritz, do you know ?
>
>
> Golden image is preloaded on the device when it is shipped to customer. Then, customer can load other shells (from Xilinx or some other vendor). If something goes wrong with the shell, customer can always go back to golden and start over again. So, golden image is going to be used in public, not just internally by Xilinx.
>
>
Thanks for the explanation.


>>
>>
>> On 2/17/21 10:40 PM, Lizhi Hou wrote:
>>> The PCIE device driver which attaches to management function on Alveo
>> to the management
>
>
> Sure.
>
>
>>> devices. It instantiates one or more partition drivers which in turn
>> more fpga partition / group ?
>
>
> Group driver.
>
>
>>> instantiate platform drivers. The instantiation of partition and platform
>>> drivers is completely data driven.
>> data driven ? everything is data driven.  do you mean dtb driven ?
>
>
> Data driven means not hard-coded. Here data means meta data which is presented in device tree format, dtb.
>
>
>>> Signed-off-by: Sonal Santan <sonal.santan@xilinx.com>
>>> Signed-off-by: Max Zhen <max.zhen@xilinx.com>
>>> Signed-off-by: Lizhi Hou <lizhih@xilinx.com>
>>> ---
>>>   drivers/fpga/xrt/include/xroot.h | 114 +++++++++++
>>>   drivers/fpga/xrt/mgmt/root.c     | 342 +++++++++++++++++++++++++++++++
>>>   2 files changed, 456 insertions(+)
>>>   create mode 100644 drivers/fpga/xrt/include/xroot.h
>>>   create mode 100644 drivers/fpga/xrt/mgmt/root.c
>>>
>>> diff --git a/drivers/fpga/xrt/include/xroot.h b/drivers/fpga/xrt/include/xroot.h
>>> new file mode 100644
>>> index 000000000000..752e10daa85e
>>> --- /dev/null
>>> +++ b/drivers/fpga/xrt/include/xroot.h
>>> @@ -0,0 +1,114 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +/*
>>> + * Header file for Xilinx Runtime (XRT) driver
>>> + *
>>> + * Copyright (C) 2020-2021 Xilinx, Inc.
>>> + *
>>> + * Authors:
>>> + *   Cheng Zhen <maxz@xilinx.com>
>>> + */
>>> +
>>> +#ifndef _XRT_ROOT_H_
>>> +#define _XRT_ROOT_H_
>>> +
>>> +#include <linux/pci.h>
>>> +#include "subdev_id.h"
>>> +#include "events.h"
>>> +
>>> +typedef bool (*xrt_subdev_match_t)(enum xrt_subdev_id,
>>> +     struct platform_device *, void *);
>>> +#define XRT_SUBDEV_MATCH_PREV        ((xrt_subdev_match_t)-1)
>>> +#define XRT_SUBDEV_MATCH_NEXT        ((xrt_subdev_match_t)-2)
>>> +
>>> +/*
>>> + * Root IOCTL calls.
>>> + */
>>> +enum xrt_root_ioctl_cmd {
>>> +     /* Leaf actions. */
>>> +     XRT_ROOT_GET_LEAF = 0,
>>> +     XRT_ROOT_PUT_LEAF,
>>> +     XRT_ROOT_GET_LEAF_HOLDERS,
>>> +
>>> +     /* Group actions. */
>>> +     XRT_ROOT_CREATE_GROUP,
>>> +     XRT_ROOT_REMOVE_GROUP,
>>> +     XRT_ROOT_LOOKUP_GROUP,
>>> +     XRT_ROOT_WAIT_GROUP_BRINGUP,
>>> +
>>> +     /* Event actions. */
>>> +     XRT_ROOT_EVENT,
>> should this be XRT_ROOT_EVENT_SYNC ?
>
>
> Sure.
>
>
>>> +     XRT_ROOT_EVENT_ASYNC,
>>> +
>>> +     /* Device info. */
>>> +     XRT_ROOT_GET_RESOURCE,
>>> +     XRT_ROOT_GET_ID,
>>> +
>>> +     /* Misc. */
>>> +     XRT_ROOT_HOT_RESET,
>>> +     XRT_ROOT_HWMON,
>>> +};
>>> +
>>> +struct xrt_root_ioctl_get_leaf {
>>> +     struct platform_device *xpigl_pdev; /* caller's pdev */
>> xpigl_ ? unneeded suffix in element names
>
>
> It's needed since the it might be included and used in > 1 .c files. I'd like to keep it's name unique.

This is an element name, the variable name sound be unique enough to make it clear.

This is not a critical issue, ok as-is.

>
>
>>> +     xrt_subdev_match_t xpigl_match_cb;
>>> +     void *xpigl_match_arg;
>>> +     struct platform_device *xpigl_leaf; /* target leaf pdev */
>>> +};
>>> +
>>> +struct xrt_root_ioctl_put_leaf {
>>> +     struct platform_device *xpipl_pdev; /* caller's pdev */
>>> +     struct platform_device *xpipl_leaf; /* target's pdev */
>> caller_pdev;
>>
>> target_pdev;
>
>
> Sure.
>
>
>>
>>> +};
>>> +
>>> +struct xrt_root_ioctl_lookup_group {
>>> +     struct platform_device *xpilp_pdev; /* caller's pdev */
>>> +     xrt_subdev_match_t xpilp_match_cb;
>>> +     void *xpilp_match_arg;
>>> +     int xpilp_grp_inst;
>>> +};
>>> +
>>> +struct xrt_root_ioctl_get_holders {
>>> +     struct platform_device *xpigh_pdev; /* caller's pdev */
>>> +     char *xpigh_holder_buf;
>>> +     size_t xpigh_holder_buf_len;
>>> +};
>>> +
>>> +struct xrt_root_ioctl_get_res {
>>> +     struct resource *xpigr_res;
>>> +};
>>> +
>>> +struct xrt_root_ioctl_get_id {
>>> +     unsigned short  xpigi_vendor_id;
>>> +     unsigned short  xpigi_device_id;
>>> +     unsigned short  xpigi_sub_vendor_id;
>>> +     unsigned short  xpigi_sub_device_id;
>>> +};
>>> +
>>> +struct xrt_root_ioctl_hwmon {
>>> +     bool xpih_register;
>>> +     const char *xpih_name;
>>> +     void *xpih_drvdata;
>>> +     const struct attribute_group **xpih_groups;
>>> +     struct device *xpih_hwmon_dev;
>>> +};
>>> +
>>> +typedef int (*xrt_subdev_root_cb_t)(struct device *, void *, u32, void *);
>> This function pointer type is important, please add a comment about its use and expected parameters
>
>
> Added.
>
>
>>> +int xrt_subdev_root_request(struct platform_device *self, u32 cmd, void *arg);
>>> +
>>> +/*
>>> + * Defines physical function (MPF / UPF) specific operations
>>> + * needed in common root driver.
>>> + */
>>> +struct xroot_pf_cb {
>>> +     void (*xpc_hot_reset)(struct pci_dev *pdev);
>> This is only ever set to xmgmt_root_hot_reset, why is this abstraction needed ?
>
>
> As comment says, hot reset is implemented differently in MPF and UPF driver. So, we need this callback in this common code. Note that we have not added UPF code in our initial patch yet. It will be added in the future.
>
>
>>> +};
>>> +
>>> +int xroot_probe(struct pci_dev *pdev, struct xroot_pf_cb *cb, void **root);
>>> +void xroot_remove(void *root);
>>> +bool xroot_wait_for_bringup(void *root);
>>> +int xroot_add_vsec_node(void *root, char *dtb);
>>> +int xroot_create_group(void *xr, char *dtb);
>>> +int xroot_add_simple_node(void *root, char *dtb, const char *endpoint);
>>> +void xroot_broadcast(void *root, enum xrt_events evt);
>>> +
>>> +#endif       /* _XRT_ROOT_H_ */
>>> diff --git a/drivers/fpga/xrt/mgmt/root.c b/drivers/fpga/xrt/mgmt/root.c
>>> new file mode 100644
>>> index 000000000000..583a37c9d30c
>>> --- /dev/null
>>> +++ b/drivers/fpga/xrt/mgmt/root.c
>>> @@ -0,0 +1,342 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/*
>>> + * Xilinx Alveo Management Function Driver
>>> + *
>>> + * Copyright (C) 2020-2021 Xilinx, Inc.
>>> + *
>>> + * Authors:
>>> + *   Cheng Zhen <maxz@xilinx.com>
>>> + */
>>> +
>>> +#include <linux/module.h>
>>> +#include <linux/pci.h>
>>> +#include <linux/aer.h>
>>> +#include <linux/vmalloc.h>
>>> +#include <linux/delay.h>
>>> +
>>> +#include "xroot.h"
>>> +#include "main-impl.h"
>>> +#include "metadata.h"
>>> +
>>> +#define XMGMT_MODULE_NAME    "xmgmt"
>> The xrt modules would be more easily identified with a 'xrt' prefix instead of 'x'
>
>
> We will change the module name to xrt-mgmt.
>
>
>>> +#define XMGMT_DRIVER_VERSION "4.0.0"
>>> +
>>> +#define XMGMT_PDEV(xm)               ((xm)->pdev)
>>> +#define XMGMT_DEV(xm)                (&(XMGMT_PDEV(xm)->dev))
>>> +#define xmgmt_err(xm, fmt, args...)  \
>>> +     dev_err(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
>>> +#define xmgmt_warn(xm, fmt, args...) \
>>> +     dev_warn(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
>>> +#define xmgmt_info(xm, fmt, args...) \
>>> +     dev_info(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
>>> +#define xmgmt_dbg(xm, fmt, args...)  \
>>> +     dev_dbg(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
>>> +#define XMGMT_DEV_ID(_pcidev)                        \
>>> +     ({ typeof(_pcidev) (pcidev) = (_pcidev);        \
>>> +     ((pci_domain_nr((pcidev)->bus) << 16) | \
>>> +     PCI_DEVID((pcidev)->bus->number, 0)); })
>>> +
>>> +static struct class *xmgmt_class;
>>> +static const struct pci_device_id xmgmt_pci_ids[] = {
>>> +     { PCI_DEVICE(0x10EE, 0xd020), }, /* Alveo U50 (golden image) */
>>> +     { PCI_DEVICE(0x10EE, 0x5020), }, /* Alveo U50 */
>> demagic this table, look at dfl-pci for how to use existing #define for the vendor and create a new on for the device.  If there are vf's add them at the same time.
>>
>> What is a golden image ?
>
>
> Fixed. Please see my comments above for golden image.
>
>
>>
>>> +     { 0, }
>>> +};
>>> +
>>> +struct xmgmt {
>>> +     struct pci_dev *pdev;
>>> +     void *root;
>>> +
>>> +     bool ready;
>>> +};
>>> +
>>> +static int xmgmt_config_pci(struct xmgmt *xm)
>>> +{
>>> +     struct pci_dev *pdev = XMGMT_PDEV(xm);
>>> +     int rc;
>>> +
>>> +     rc = pcim_enable_device(pdev);
>>> +     if (rc < 0) {
>>> +             xmgmt_err(xm, "failed to enable device: %d", rc);
>>> +             return rc;
>>> +     }
>>> +
>>> +     rc = pci_enable_pcie_error_reporting(pdev);
>>> +     if (rc)
>>> +             xmgmt_warn(xm, "failed to enable AER: %d", rc);
>>> +
>>> +     pci_set_master(pdev);
>>> +
>>> +     rc = pcie_get_readrq(pdev);
>> Review this call, it does not go negative
>
>
> I'll remove the check against negative value.
>
>
>>> +     if (rc < 0) {
>>> +             xmgmt_err(xm, "failed to read mrrs %d", rc);
>>> +             return rc;
>>> +     }
>> this is a quirk, add a comment.
>
>
> Will remove.
>
>
>>> +     if (rc > 512) {
>>> +             rc = pcie_set_readrq(pdev, 512);
>>> +             if (rc) {
>>> +                     xmgmt_err(xm, "failed to force mrrs %d", rc);
>> similar calls do not fail here.
>
>
> Will remove.
>
>
>>> +                     return rc;
>>> +             }
>>> +     }
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +static int xmgmt_match_slot_and_save(struct device *dev, void *data)
>>> +{
>>> +     struct xmgmt *xm = data;
>>> +     struct pci_dev *pdev = to_pci_dev(dev);
>>> +
>>> +     if (XMGMT_DEV_ID(pdev) == XMGMT_DEV_ID(xm->pdev)) {
>>> +             pci_cfg_access_lock(pdev);
>>> +             pci_save_state(pdev);
>>> +     }
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +static void xmgmt_pci_save_config_all(struct xmgmt *xm)
>>> +{
>>> +     bus_for_each_dev(&pci_bus_type, NULL, xm, xmgmt_match_slot_and_save);
>> This is a bus call, not a device call.
>>
>> Can this be changed into something like what hot reset does ?
>
>
> We are working on both mgmt pf and user pf here, so sort of like a bus. But, it might be better to refactor this when we have our own bus type implementation. We do not need to make PCIE bus call. We will fix this in V5 patch set where we'll implement our own bus type.
>
>
ok
>>
>>> +}
>>> +
>>> +static int xmgmt_match_slot_and_restore(struct device *dev, void *data)
>>> +{
>>> +     struct xmgmt *xm = data;
>>> +     struct pci_dev *pdev = to_pci_dev(dev);
>>> +
>>> +     if (XMGMT_DEV_ID(pdev) == XMGMT_DEV_ID(xm->pdev)) {
>>> +             pci_restore_state(pdev);
>>> +             pci_cfg_access_unlock(pdev);
>>> +     }
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +static void xmgmt_pci_restore_config_all(struct xmgmt *xm)
>>> +{
>>> +     bus_for_each_dev(&pci_bus_type, NULL, xm, xmgmt_match_slot_and_restore);
>>> +}
>>> +
>>> +static void xmgmt_root_hot_reset(struct pci_dev *pdev)
>>> +{
>>> +     struct xmgmt *xm = pci_get_drvdata(pdev);
>>> +     struct pci_bus *bus;
>>> +     u8 pci_bctl;
>>> +     u16 pci_cmd, devctl;
>>> +     int i, ret;
>>> +
>>> +     xmgmt_info(xm, "hot reset start");
>>> +
>>> +     xmgmt_pci_save_config_all(xm);
>>> +
>>> +     pci_disable_device(pdev);
>>> +
>>> +     bus = pdev->bus;
>>> +
>>> +     /*
>>> +      * When flipping the SBR bit, device can fall off the bus. This is
>>> +      * usually no problem at all so long as drivers are working properly
>>> +      * after SBR. However, some systems complain bitterly when the device
>>> +      * falls off the bus.
>>> +      * The quick solution is to temporarily disable the SERR reporting of
>>> +      * switch port during SBR.
>>> +      */
>>> +
>>> +     pci_read_config_word(bus->self, PCI_COMMAND, &pci_cmd);
>>> +     pci_write_config_word(bus->self, PCI_COMMAND,
>>> +                           (pci_cmd & ~PCI_COMMAND_SERR));
>>> +     pcie_capability_read_word(bus->self, PCI_EXP_DEVCTL, &devctl);
>>> +     pcie_capability_write_word(bus->self, PCI_EXP_DEVCTL,
>>> +                                (devctl & ~PCI_EXP_DEVCTL_FERE));
>>> +     pci_read_config_byte(bus->self, PCI_BRIDGE_CONTROL, &pci_bctl);
>>> +     pci_bctl |= PCI_BRIDGE_CTL_BUS_RESET;
>>> +     pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
>> how the pci config values are set and cleared should be consistent.
>>
>> this call should be
>>
>> pci_write_config_byte (... pci_bctl | PCI_BRIDGE_CTL_BUF_RESET )
>>
>> and the next &= avoided
>
>
> Sure.
>
>
>>
>>> +
>>> +     msleep(100);
>>> +     pci_bctl &= ~PCI_BRIDGE_CTL_BUS_RESET;
>>> +     pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
>>> +     ssleep(1);
>>> +
>>> +     pcie_capability_write_word(bus->self, PCI_EXP_DEVCTL, devctl);
>>> +     pci_write_config_word(bus->self, PCI_COMMAND, pci_cmd);
>>> +
>>> +     ret = pci_enable_device(pdev);
>>> +     if (ret)
>>> +             xmgmt_err(xm, "failed to enable device, ret %d", ret);
>>> +
>>> +     for (i = 0; i < 300; i++) {
>>> +             pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd);
>>> +             if (pci_cmd != 0xffff)
>> what happens with i == 300 and pci_cmd is still 0xffff ?
>
>
> Something wrong happens to the device since it's not coming back after the reset. In this case, the device cannot be used and the only way to recover is to power cycle the system so that the shell can be reloaded from the flash on the device.
>
>
so check and add a dev_crit() to let the user know.
>>> +                     break;
>>> +             msleep(20);
>>> +     }
>>> +
>>> +     xmgmt_info(xm, "waiting for %d ms", i * 20);
>>> +     xmgmt_pci_restore_config_all(xm);
>>> +     xmgmt_config_pci(xm);
>>> +}
>>> +
>>> +static int xmgmt_create_root_metadata(struct xmgmt *xm, char **root_dtb)
>>> +{
>>> +     char *dtb = NULL;
>>> +     int ret;
>>> +
>>> +     ret = xrt_md_create(XMGMT_DEV(xm), &dtb);
>>> +     if (ret) {
>>> +             xmgmt_err(xm, "create metadata failed, ret %d", ret);
>>> +             goto failed;
>>> +     }
>>> +
>>> +     ret = xroot_add_vsec_node(xm->root, dtb);
>>> +     if (ret == -ENOENT) {
>>> +             /*
>>> +              * We may be dealing with a MFG board.
>>> +              * Try vsec-golden which will bring up all hard-coded leaves
>>> +              * at hard-coded offsets.
>>> +              */
>>> +             ret = xroot_add_simple_node(xm->root, dtb, XRT_MD_NODE_VSEC_GOLDEN);
>> Manufacturing diagnostics ?
>
>
> This is for golden image support. Please see my comments above.

Ok, i got it :)

Thanks, looking forward next rev

Tom

>
>
> Thanks,
>
> Max
>
>>
>> Tom
>>
>>> +     } else if (ret == 0) {
>>> +             ret = xroot_add_simple_node(xm->root, dtb, XRT_MD_NODE_MGMT_MAIN);
>>> +     }
>>> +     if (ret)
>>> +             goto failed;
>>> +
>>> +     *root_dtb = dtb;
>>> +     return 0;
>>> +
>>> +failed:
>>> +     vfree(dtb);
>>> +     return ret;
>>> +}
>>> +
>>> +static ssize_t ready_show(struct device *dev,
>>> +                       struct device_attribute *da,
>>> +                       char *buf)
>>> +{
>>> +     struct pci_dev *pdev = to_pci_dev(dev);
>>> +     struct xmgmt *xm = pci_get_drvdata(pdev);
>>> +
>>> +     return sprintf(buf, "%d\n", xm->ready);
>>> +}
>>> +static DEVICE_ATTR_RO(ready);
>>> +
>>> +static struct attribute *xmgmt_root_attrs[] = {
>>> +     &dev_attr_ready.attr,
>>> +     NULL
>>> +};
>>> +
>>> +static struct attribute_group xmgmt_root_attr_group = {
>>> +     .attrs = xmgmt_root_attrs,
>>> +};
>>> +
>>> +static struct xroot_pf_cb xmgmt_xroot_pf_cb = {
>>> +     .xpc_hot_reset = xmgmt_root_hot_reset,
>>> +};
>>> +
>>> +static int xmgmt_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>> +{
>>> +     int ret;
>>> +     struct device *dev = &pdev->dev;
>>> +     struct xmgmt *xm = devm_kzalloc(dev, sizeof(*xm), GFP_KERNEL);
>>> +     char *dtb = NULL;
>>> +
>>> +     if (!xm)
>>> +             return -ENOMEM;
>>> +     xm->pdev = pdev;
>>> +     pci_set_drvdata(pdev, xm);
>>> +
>>> +     ret = xmgmt_config_pci(xm);
>>> +     if (ret)
>>> +             goto failed;
>>> +
>>> +     ret = xroot_probe(pdev, &xmgmt_xroot_pf_cb, &xm->root);
>>> +     if (ret)
>>> +             goto failed;
>>> +
>>> +     ret = xmgmt_create_root_metadata(xm, &dtb);
>>> +     if (ret)
>>> +             goto failed_metadata;
>>> +
>>> +     ret = xroot_create_group(xm->root, dtb);
>>> +     vfree(dtb);
>>> +     if (ret)
>>> +             xmgmt_err(xm, "failed to create root group: %d", ret);
>>> +
>>> +     if (!xroot_wait_for_bringup(xm->root))
>>> +             xmgmt_err(xm, "failed to bringup all groups");
>>> +     else
>>> +             xm->ready = true;
>>> +
>>> +     ret = sysfs_create_group(&pdev->dev.kobj, &xmgmt_root_attr_group);
>>> +     if (ret) {
>>> +             /* Warning instead of failing the probe. */
>>> +             xmgmt_warn(xm, "create xmgmt root attrs failed: %d", ret);
>>> +     }
>>> +
>>> +     xroot_broadcast(xm->root, XRT_EVENT_POST_CREATION);
>>> +     xmgmt_info(xm, "%s started successfully", XMGMT_MODULE_NAME);
>>> +     return 0;
>>> +
>>> +failed_metadata:
>>> +     (void)xroot_remove(xm->root);
>>> +failed:
>>> +     pci_set_drvdata(pdev, NULL);
>>> +     return ret;
>>> +}
>>> +
>>> +static void xmgmt_remove(struct pci_dev *pdev)
>>> +{
>>> +     struct xmgmt *xm = pci_get_drvdata(pdev);
>>> +
>>> +     xroot_broadcast(xm->root, XRT_EVENT_PRE_REMOVAL);
>>> +     sysfs_remove_group(&pdev->dev.kobj, &xmgmt_root_attr_group);
>>> +     (void)xroot_remove(xm->root);
>>> +     pci_disable_pcie_error_reporting(xm->pdev);
>>> +     xmgmt_info(xm, "%s cleaned up successfully", XMGMT_MODULE_NAME);
>>> +}
>>> +
>>> +static struct pci_driver xmgmt_driver = {
>>> +     .name = XMGMT_MODULE_NAME,
>>> +     .id_table = xmgmt_pci_ids,
>>> +     .probe = xmgmt_probe,
>>> +     .remove = xmgmt_remove,
>>> +};
>>> +
>>> +static int __init xmgmt_init(void)
>>> +{
>>> +     int res = 0;
>>> +
>>> +     res = xmgmt_main_register_leaf();
>>> +     if (res)
>>> +             return res;
>>> +
>>> +     xmgmt_class = class_create(THIS_MODULE, XMGMT_MODULE_NAME);
>>> +     if (IS_ERR(xmgmt_class))
>>> +             return PTR_ERR(xmgmt_class);
>>> +
>>> +     res = pci_register_driver(&xmgmt_driver);
>>> +     if (res) {
>>> +             class_destroy(xmgmt_class);
>>> +             return res;
>>> +     }
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +static __exit void xmgmt_exit(void)
>>> +{
>>> +     pci_unregister_driver(&xmgmt_driver);
>>> +     class_destroy(xmgmt_class);
>>> +     xmgmt_main_unregister_leaf();
>>> +}
>>> +
>>> +module_init(xmgmt_init);
>>> +module_exit(xmgmt_exit);
>>> +
>>> +MODULE_DEVICE_TABLE(pci, xmgmt_pci_ids);
>>> +MODULE_VERSION(XMGMT_DRIVER_VERSION);
>>> +MODULE_AUTHOR("XRT Team <runtime@xilinx.com>");
>>> +MODULE_DESCRIPTION("Xilinx Alveo management function driver");
>>> +MODULE_LICENSE("GPL v2");
>
Max Zhen March 18, 2021, 12:44 a.m. UTC | #5
On 3/17/21 2:08 PM, Tom Rix wrote:
>
> On 3/16/21 1:29 PM, Max Zhen wrote:
>> Hi Tom,
>>
>>
>> On 2/26/21 7:01 AM, Tom Rix wrote:
>>
>>
>>
>> A question i do not know the answer to.
>>
>> Seems like 'golden' is linked to a manufacturing (diagnostics?) image.
>>
>> If the public will never see it, should handling it here be done ?
>>
>> Moritz, do you know ?
>>
>> Golden image is preloaded on the device when it is shipped to customer. Then, customer can load other shells (from Xilinx or some other vendor). If something goes wrong with the shell, customer can always go back to golden and start over again. So, golden image is going to be used in public, not just internally by Xilinx.
>>
>>
> Thanks for the explanation.
>
>
>>>
>>> On 2/17/21 10:40 PM, Lizhi Hou wrote:
>>>> The PCIE device driver which attaches to management function on Alveo
>>> to the management
>>
>> Sure.
>>
>>
>>>> devices. It instantiates one or more partition drivers which in turn
>>> more fpga partition / group ?
>>
>> Group driver.
>>
>>
>>>> instantiate platform drivers. The instantiation of partition and platform
>>>> drivers is completely data driven.
>>> data driven ? everything is data driven.  do you mean dtb driven ?
>>
>> Data driven means not hard-coded. Here data means meta data which is presented in device tree format, dtb.
>>
>>
>>>> Signed-off-by: Sonal Santan <sonal.santan@xilinx.com>
>>>> Signed-off-by: Max Zhen <max.zhen@xilinx.com>
>>>> Signed-off-by: Lizhi Hou <lizhih@xilinx.com>
>>>> ---
>>>>    drivers/fpga/xrt/include/xroot.h | 114 +++++++++++
>>>>    drivers/fpga/xrt/mgmt/root.c     | 342 +++++++++++++++++++++++++++++++
>>>>    2 files changed, 456 insertions(+)
>>>>    create mode 100644 drivers/fpga/xrt/include/xroot.h
>>>>    create mode 100644 drivers/fpga/xrt/mgmt/root.c
>>>>
>>>> diff --git a/drivers/fpga/xrt/include/xroot.h b/drivers/fpga/xrt/include/xroot.h
>>>> new file mode 100644
>>>> index 000000000000..752e10daa85e
>>>> --- /dev/null
>>>> +++ b/drivers/fpga/xrt/include/xroot.h
>>>> @@ -0,0 +1,114 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>> +/*
>>>> + * Header file for Xilinx Runtime (XRT) driver
>>>> + *
>>>> + * Copyright (C) 2020-2021 Xilinx, Inc.
>>>> + *
>>>> + * Authors:
>>>> + *   Cheng Zhen <maxz@xilinx.com>
>>>> + */
>>>> +
>>>> +#ifndef _XRT_ROOT_H_
>>>> +#define _XRT_ROOT_H_
>>>> +
>>>> +#include <linux/pci.h>
>>>> +#include "subdev_id.h"
>>>> +#include "events.h"
>>>> +
>>>> +typedef bool (*xrt_subdev_match_t)(enum xrt_subdev_id,
>>>> +     struct platform_device *, void *);
>>>> +#define XRT_SUBDEV_MATCH_PREV        ((xrt_subdev_match_t)-1)
>>>> +#define XRT_SUBDEV_MATCH_NEXT        ((xrt_subdev_match_t)-2)
>>>> +
>>>> +/*
>>>> + * Root IOCTL calls.
>>>> + */
>>>> +enum xrt_root_ioctl_cmd {
>>>> +     /* Leaf actions. */
>>>> +     XRT_ROOT_GET_LEAF = 0,
>>>> +     XRT_ROOT_PUT_LEAF,
>>>> +     XRT_ROOT_GET_LEAF_HOLDERS,
>>>> +
>>>> +     /* Group actions. */
>>>> +     XRT_ROOT_CREATE_GROUP,
>>>> +     XRT_ROOT_REMOVE_GROUP,
>>>> +     XRT_ROOT_LOOKUP_GROUP,
>>>> +     XRT_ROOT_WAIT_GROUP_BRINGUP,
>>>> +
>>>> +     /* Event actions. */
>>>> +     XRT_ROOT_EVENT,
>>> should this be XRT_ROOT_EVENT_SYNC ?
>>
>> Sure.
>>
>>
>>>> +     XRT_ROOT_EVENT_ASYNC,
>>>> +
>>>> +     /* Device info. */
>>>> +     XRT_ROOT_GET_RESOURCE,
>>>> +     XRT_ROOT_GET_ID,
>>>> +
>>>> +     /* Misc. */
>>>> +     XRT_ROOT_HOT_RESET,
>>>> +     XRT_ROOT_HWMON,
>>>> +};
>>>> +
>>>> +struct xrt_root_ioctl_get_leaf {
>>>> +     struct platform_device *xpigl_pdev; /* caller's pdev */
>>> xpigl_ ? unneeded suffix in element names
>>
>> It's needed since the it might be included and used in > 1 .c files. I'd like to keep it's name unique.
> This is an element name, the variable name sound be unique enough to make it clear.
>
> This is not a critical issue, ok as-is.
>
>>
>>>> +     xrt_subdev_match_t xpigl_match_cb;
>>>> +     void *xpigl_match_arg;
>>>> +     struct platform_device *xpigl_leaf; /* target leaf pdev */
>>>> +};
>>>> +
>>>> +struct xrt_root_ioctl_put_leaf {
>>>> +     struct platform_device *xpipl_pdev; /* caller's pdev */
>>>> +     struct platform_device *xpipl_leaf; /* target's pdev */
>>> caller_pdev;
>>>
>>> target_pdev;
>>
>> Sure.
>>
>>
>>>> +};
>>>> +
>>>> +struct xrt_root_ioctl_lookup_group {
>>>> +     struct platform_device *xpilp_pdev; /* caller's pdev */
>>>> +     xrt_subdev_match_t xpilp_match_cb;
>>>> +     void *xpilp_match_arg;
>>>> +     int xpilp_grp_inst;
>>>> +};
>>>> +
>>>> +struct xrt_root_ioctl_get_holders {
>>>> +     struct platform_device *xpigh_pdev; /* caller's pdev */
>>>> +     char *xpigh_holder_buf;
>>>> +     size_t xpigh_holder_buf_len;
>>>> +};
>>>> +
>>>> +struct xrt_root_ioctl_get_res {
>>>> +     struct resource *xpigr_res;
>>>> +};
>>>> +
>>>> +struct xrt_root_ioctl_get_id {
>>>> +     unsigned short  xpigi_vendor_id;
>>>> +     unsigned short  xpigi_device_id;
>>>> +     unsigned short  xpigi_sub_vendor_id;
>>>> +     unsigned short  xpigi_sub_device_id;
>>>> +};
>>>> +
>>>> +struct xrt_root_ioctl_hwmon {
>>>> +     bool xpih_register;
>>>> +     const char *xpih_name;
>>>> +     void *xpih_drvdata;
>>>> +     const struct attribute_group **xpih_groups;
>>>> +     struct device *xpih_hwmon_dev;
>>>> +};
>>>> +
>>>> +typedef int (*xrt_subdev_root_cb_t)(struct device *, void *, u32, void *);
>>> This function pointer type is important, please add a comment about its use and expected parameters
>>
>> Added.
>>
>>
>>>> +int xrt_subdev_root_request(struct platform_device *self, u32 cmd, void *arg);
>>>> +
>>>> +/*
>>>> + * Defines physical function (MPF / UPF) specific operations
>>>> + * needed in common root driver.
>>>> + */
>>>> +struct xroot_pf_cb {
>>>> +     void (*xpc_hot_reset)(struct pci_dev *pdev);
>>> This is only ever set to xmgmt_root_hot_reset, why is this abstraction needed ?
>>
>> As comment says, hot reset is implemented differently in MPF and UPF driver. So, we need this callback in this common code. Note that we have not added UPF code in our initial patch yet. It will be added in the future.
>>
>>
>>>> +};
>>>> +
>>>> +int xroot_probe(struct pci_dev *pdev, struct xroot_pf_cb *cb, void **root);
>>>> +void xroot_remove(void *root);
>>>> +bool xroot_wait_for_bringup(void *root);
>>>> +int xroot_add_vsec_node(void *root, char *dtb);
>>>> +int xroot_create_group(void *xr, char *dtb);
>>>> +int xroot_add_simple_node(void *root, char *dtb, const char *endpoint);
>>>> +void xroot_broadcast(void *root, enum xrt_events evt);
>>>> +
>>>> +#endif       /* _XRT_ROOT_H_ */
>>>> diff --git a/drivers/fpga/xrt/mgmt/root.c b/drivers/fpga/xrt/mgmt/root.c
>>>> new file mode 100644
>>>> index 000000000000..583a37c9d30c
>>>> --- /dev/null
>>>> +++ b/drivers/fpga/xrt/mgmt/root.c
>>>> @@ -0,0 +1,342 @@
>>>> +// SPDX-License-Identifier: GPL-2.0
>>>> +/*
>>>> + * Xilinx Alveo Management Function Driver
>>>> + *
>>>> + * Copyright (C) 2020-2021 Xilinx, Inc.
>>>> + *
>>>> + * Authors:
>>>> + *   Cheng Zhen <maxz@xilinx.com>
>>>> + */
>>>> +
>>>> +#include <linux/module.h>
>>>> +#include <linux/pci.h>
>>>> +#include <linux/aer.h>
>>>> +#include <linux/vmalloc.h>
>>>> +#include <linux/delay.h>
>>>> +
>>>> +#include "xroot.h"
>>>> +#include "main-impl.h"
>>>> +#include "metadata.h"
>>>> +
>>>> +#define XMGMT_MODULE_NAME    "xmgmt"
>>> The xrt modules would be more easily identified with a 'xrt' prefix instead of 'x'
>>
>> We will change the module name to xrt-mgmt.
>>
>>
>>>> +#define XMGMT_DRIVER_VERSION "4.0.0"
>>>> +
>>>> +#define XMGMT_PDEV(xm)               ((xm)->pdev)
>>>> +#define XMGMT_DEV(xm)                (&(XMGMT_PDEV(xm)->dev))
>>>> +#define xmgmt_err(xm, fmt, args...)  \
>>>> +     dev_err(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
>>>> +#define xmgmt_warn(xm, fmt, args...) \
>>>> +     dev_warn(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
>>>> +#define xmgmt_info(xm, fmt, args...) \
>>>> +     dev_info(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
>>>> +#define xmgmt_dbg(xm, fmt, args...)  \
>>>> +     dev_dbg(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
>>>> +#define XMGMT_DEV_ID(_pcidev)                        \
>>>> +     ({ typeof(_pcidev) (pcidev) = (_pcidev);        \
>>>> +     ((pci_domain_nr((pcidev)->bus) << 16) | \
>>>> +     PCI_DEVID((pcidev)->bus->number, 0)); })
>>>> +
>>>> +static struct class *xmgmt_class;
>>>> +static const struct pci_device_id xmgmt_pci_ids[] = {
>>>> +     { PCI_DEVICE(0x10EE, 0xd020), }, /* Alveo U50 (golden image) */
>>>> +     { PCI_DEVICE(0x10EE, 0x5020), }, /* Alveo U50 */
>>> demagic this table, look at dfl-pci for how to use existing #define for the vendor and create a new on for the device.  If there are vf's add them at the same time.
>>>
>>> What is a golden image ?
>>
>> Fixed. Please see my comments above for golden image.
>>
>>
>>>> +     { 0, }
>>>> +};
>>>> +
>>>> +struct xmgmt {
>>>> +     struct pci_dev *pdev;
>>>> +     void *root;
>>>> +
>>>> +     bool ready;
>>>> +};
>>>> +
>>>> +static int xmgmt_config_pci(struct xmgmt *xm)
>>>> +{
>>>> +     struct pci_dev *pdev = XMGMT_PDEV(xm);
>>>> +     int rc;
>>>> +
>>>> +     rc = pcim_enable_device(pdev);
>>>> +     if (rc < 0) {
>>>> +             xmgmt_err(xm, "failed to enable device: %d", rc);
>>>> +             return rc;
>>>> +     }
>>>> +
>>>> +     rc = pci_enable_pcie_error_reporting(pdev);
>>>> +     if (rc)
>>>> +             xmgmt_warn(xm, "failed to enable AER: %d", rc);
>>>> +
>>>> +     pci_set_master(pdev);
>>>> +
>>>> +     rc = pcie_get_readrq(pdev);
>>> Review this call, it does not go negative
>>
>> I'll remove the check against negative value.
>>
>>
>>>> +     if (rc < 0) {
>>>> +             xmgmt_err(xm, "failed to read mrrs %d", rc);
>>>> +             return rc;
>>>> +     }
>>> this is a quirk, add a comment.
>>
>> Will remove.
>>
>>
>>>> +     if (rc > 512) {
>>>> +             rc = pcie_set_readrq(pdev, 512);
>>>> +             if (rc) {
>>>> +                     xmgmt_err(xm, "failed to force mrrs %d", rc);
>>> similar calls do not fail here.
>>
>> Will remove.
>>
>>
>>>> +                     return rc;
>>>> +             }
>>>> +     }
>>>> +
>>>> +     return 0;
>>>> +}
>>>> +
>>>> +static int xmgmt_match_slot_and_save(struct device *dev, void *data)
>>>> +{
>>>> +     struct xmgmt *xm = data;
>>>> +     struct pci_dev *pdev = to_pci_dev(dev);
>>>> +
>>>> +     if (XMGMT_DEV_ID(pdev) == XMGMT_DEV_ID(xm->pdev)) {
>>>> +             pci_cfg_access_lock(pdev);
>>>> +             pci_save_state(pdev);
>>>> +     }
>>>> +
>>>> +     return 0;
>>>> +}
>>>> +
>>>> +static void xmgmt_pci_save_config_all(struct xmgmt *xm)
>>>> +{
>>>> +     bus_for_each_dev(&pci_bus_type, NULL, xm, xmgmt_match_slot_and_save);
>>> This is a bus call, not a device call.
>>>
>>> Can this be changed into something like what hot reset does ?
>>
>> We are working on both mgmt pf and user pf here, so sort of like a bus. But, it might be better to refactor this when we have our own bus type implementation. We do not need to make PCIE bus call. We will fix this in V5 patch set where we'll implement our own bus type.
>>
>>
> ok
>>>> +}
>>>> +
>>>> +static int xmgmt_match_slot_and_restore(struct device *dev, void *data)
>>>> +{
>>>> +     struct xmgmt *xm = data;
>>>> +     struct pci_dev *pdev = to_pci_dev(dev);
>>>> +
>>>> +     if (XMGMT_DEV_ID(pdev) == XMGMT_DEV_ID(xm->pdev)) {
>>>> +             pci_restore_state(pdev);
>>>> +             pci_cfg_access_unlock(pdev);
>>>> +     }
>>>> +
>>>> +     return 0;
>>>> +}
>>>> +
>>>> +static void xmgmt_pci_restore_config_all(struct xmgmt *xm)
>>>> +{
>>>> +     bus_for_each_dev(&pci_bus_type, NULL, xm, xmgmt_match_slot_and_restore);
>>>> +}
>>>> +
>>>> +static void xmgmt_root_hot_reset(struct pci_dev *pdev)
>>>> +{
>>>> +     struct xmgmt *xm = pci_get_drvdata(pdev);
>>>> +     struct pci_bus *bus;
>>>> +     u8 pci_bctl;
>>>> +     u16 pci_cmd, devctl;
>>>> +     int i, ret;
>>>> +
>>>> +     xmgmt_info(xm, "hot reset start");
>>>> +
>>>> +     xmgmt_pci_save_config_all(xm);
>>>> +
>>>> +     pci_disable_device(pdev);
>>>> +
>>>> +     bus = pdev->bus;
>>>> +
>>>> +     /*
>>>> +      * When flipping the SBR bit, device can fall off the bus. This is
>>>> +      * usually no problem at all so long as drivers are working properly
>>>> +      * after SBR. However, some systems complain bitterly when the device
>>>> +      * falls off the bus.
>>>> +      * The quick solution is to temporarily disable the SERR reporting of
>>>> +      * switch port during SBR.
>>>> +      */
>>>> +
>>>> +     pci_read_config_word(bus->self, PCI_COMMAND, &pci_cmd);
>>>> +     pci_write_config_word(bus->self, PCI_COMMAND,
>>>> +                           (pci_cmd & ~PCI_COMMAND_SERR));
>>>> +     pcie_capability_read_word(bus->self, PCI_EXP_DEVCTL, &devctl);
>>>> +     pcie_capability_write_word(bus->self, PCI_EXP_DEVCTL,
>>>> +                                (devctl & ~PCI_EXP_DEVCTL_FERE));
>>>> +     pci_read_config_byte(bus->self, PCI_BRIDGE_CONTROL, &pci_bctl);
>>>> +     pci_bctl |= PCI_BRIDGE_CTL_BUS_RESET;
>>>> +     pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
>>> how the pci config values are set and cleared should be consistent.
>>>
>>> this call should be
>>>
>>> pci_write_config_byte (... pci_bctl | PCI_BRIDGE_CTL_BUF_RESET )
>>>
>>> and the next &= avoided
>>
>> Sure.
>>
>>
>>>> +
>>>> +     msleep(100);
>>>> +     pci_bctl &= ~PCI_BRIDGE_CTL_BUS_RESET;
>>>> +     pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
>>>> +     ssleep(1);
>>>> +
>>>> +     pcie_capability_write_word(bus->self, PCI_EXP_DEVCTL, devctl);
>>>> +     pci_write_config_word(bus->self, PCI_COMMAND, pci_cmd);
>>>> +
>>>> +     ret = pci_enable_device(pdev);
>>>> +     if (ret)
>>>> +             xmgmt_err(xm, "failed to enable device, ret %d", ret);
>>>> +
>>>> +     for (i = 0; i < 300; i++) {
>>>> +             pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd);
>>>> +             if (pci_cmd != 0xffff)
>>> what happens with i == 300 and pci_cmd is still 0xffff ?
>>
>> Something wrong happens to the device since it's not coming back after the reset. In this case, the device cannot be used and the only way to recover is to power cycle the system so that the shell can be reloaded from the flash on the device.
>>
>>
> so check and add a dev_crit() to let the user know.


Sure. Will do.


Thanks,

Max

>>>> +                     break;
>>>> +             msleep(20);
>>>> +     }
>>>> +
>>>> +     xmgmt_info(xm, "waiting for %d ms", i * 20);
>>>> +     xmgmt_pci_restore_config_all(xm);
>>>> +     xmgmt_config_pci(xm);
>>>> +}
>>>> +
>>>> +static int xmgmt_create_root_metadata(struct xmgmt *xm, char **root_dtb)
>>>> +{
>>>> +     char *dtb = NULL;
>>>> +     int ret;
>>>> +
>>>> +     ret = xrt_md_create(XMGMT_DEV(xm), &dtb);
>>>> +     if (ret) {
>>>> +             xmgmt_err(xm, "create metadata failed, ret %d", ret);
>>>> +             goto failed;
>>>> +     }
>>>> +
>>>> +     ret = xroot_add_vsec_node(xm->root, dtb);
>>>> +     if (ret == -ENOENT) {
>>>> +             /*
>>>> +              * We may be dealing with a MFG board.
>>>> +              * Try vsec-golden which will bring up all hard-coded leaves
>>>> +              * at hard-coded offsets.
>>>> +              */
>>>> +             ret = xroot_add_simple_node(xm->root, dtb, XRT_MD_NODE_VSEC_GOLDEN);
>>> Manufacturing diagnostics ?
>>
>> This is for golden image support. Please see my comments above.
> Ok, i got it :)
>
> Thanks, looking forward next rev
>
> Tom
>
>>
>> Thanks,
>>
>> Max
>>
>>> Tom
>>>
>>>> +     } else if (ret == 0) {
>>>> +             ret = xroot_add_simple_node(xm->root, dtb, XRT_MD_NODE_MGMT_MAIN);
>>>> +     }
>>>> +     if (ret)
>>>> +             goto failed;
>>>> +
>>>> +     *root_dtb = dtb;
>>>> +     return 0;
>>>> +
>>>> +failed:
>>>> +     vfree(dtb);
>>>> +     return ret;
>>>> +}
>>>> +
>>>> +static ssize_t ready_show(struct device *dev,
>>>> +                       struct device_attribute *da,
>>>> +                       char *buf)
>>>> +{
>>>> +     struct pci_dev *pdev = to_pci_dev(dev);
>>>> +     struct xmgmt *xm = pci_get_drvdata(pdev);
>>>> +
>>>> +     return sprintf(buf, "%d\n", xm->ready);
>>>> +}
>>>> +static DEVICE_ATTR_RO(ready);
>>>> +
>>>> +static struct attribute *xmgmt_root_attrs[] = {
>>>> +     &dev_attr_ready.attr,
>>>> +     NULL
>>>> +};
>>>> +
>>>> +static struct attribute_group xmgmt_root_attr_group = {
>>>> +     .attrs = xmgmt_root_attrs,
>>>> +};
>>>> +
>>>> +static struct xroot_pf_cb xmgmt_xroot_pf_cb = {
>>>> +     .xpc_hot_reset = xmgmt_root_hot_reset,
>>>> +};
>>>> +
>>>> +static int xmgmt_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>> +{
>>>> +     int ret;
>>>> +     struct device *dev = &pdev->dev;
>>>> +     struct xmgmt *xm = devm_kzalloc(dev, sizeof(*xm), GFP_KERNEL);
>>>> +     char *dtb = NULL;
>>>> +
>>>> +     if (!xm)
>>>> +             return -ENOMEM;
>>>> +     xm->pdev = pdev;
>>>> +     pci_set_drvdata(pdev, xm);
>>>> +
>>>> +     ret = xmgmt_config_pci(xm);
>>>> +     if (ret)
>>>> +             goto failed;
>>>> +
>>>> +     ret = xroot_probe(pdev, &xmgmt_xroot_pf_cb, &xm->root);
>>>> +     if (ret)
>>>> +             goto failed;
>>>> +
>>>> +     ret = xmgmt_create_root_metadata(xm, &dtb);
>>>> +     if (ret)
>>>> +             goto failed_metadata;
>>>> +
>>>> +     ret = xroot_create_group(xm->root, dtb);
>>>> +     vfree(dtb);
>>>> +     if (ret)
>>>> +             xmgmt_err(xm, "failed to create root group: %d", ret);
>>>> +
>>>> +     if (!xroot_wait_for_bringup(xm->root))
>>>> +             xmgmt_err(xm, "failed to bringup all groups");
>>>> +     else
>>>> +             xm->ready = true;
>>>> +
>>>> +     ret = sysfs_create_group(&pdev->dev.kobj, &xmgmt_root_attr_group);
>>>> +     if (ret) {
>>>> +             /* Warning instead of failing the probe. */
>>>> +             xmgmt_warn(xm, "create xmgmt root attrs failed: %d", ret);
>>>> +     }
>>>> +
>>>> +     xroot_broadcast(xm->root, XRT_EVENT_POST_CREATION);
>>>> +     xmgmt_info(xm, "%s started successfully", XMGMT_MODULE_NAME);
>>>> +     return 0;
>>>> +
>>>> +failed_metadata:
>>>> +     (void)xroot_remove(xm->root);
>>>> +failed:
>>>> +     pci_set_drvdata(pdev, NULL);
>>>> +     return ret;
>>>> +}
>>>> +
>>>> +static void xmgmt_remove(struct pci_dev *pdev)
>>>> +{
>>>> +     struct xmgmt *xm = pci_get_drvdata(pdev);
>>>> +
>>>> +     xroot_broadcast(xm->root, XRT_EVENT_PRE_REMOVAL);
>>>> +     sysfs_remove_group(&pdev->dev.kobj, &xmgmt_root_attr_group);
>>>> +     (void)xroot_remove(xm->root);
>>>> +     pci_disable_pcie_error_reporting(xm->pdev);
>>>> +     xmgmt_info(xm, "%s cleaned up successfully", XMGMT_MODULE_NAME);
>>>> +}
>>>> +
>>>> +static struct pci_driver xmgmt_driver = {
>>>> +     .name = XMGMT_MODULE_NAME,
>>>> +     .id_table = xmgmt_pci_ids,
>>>> +     .probe = xmgmt_probe,
>>>> +     .remove = xmgmt_remove,
>>>> +};
>>>> +
>>>> +static int __init xmgmt_init(void)
>>>> +{
>>>> +     int res = 0;
>>>> +
>>>> +     res = xmgmt_main_register_leaf();
>>>> +     if (res)
>>>> +             return res;
>>>> +
>>>> +     xmgmt_class = class_create(THIS_MODULE, XMGMT_MODULE_NAME);
>>>> +     if (IS_ERR(xmgmt_class))
>>>> +             return PTR_ERR(xmgmt_class);
>>>> +
>>>> +     res = pci_register_driver(&xmgmt_driver);
>>>> +     if (res) {
>>>> +             class_destroy(xmgmt_class);
>>>> +             return res;
>>>> +     }
>>>> +
>>>> +     return 0;
>>>> +}
>>>> +
>>>> +static __exit void xmgmt_exit(void)
>>>> +{
>>>> +     pci_unregister_driver(&xmgmt_driver);
>>>> +     class_destroy(xmgmt_class);
>>>> +     xmgmt_main_unregister_leaf();
>>>> +}
>>>> +
>>>> +module_init(xmgmt_init);
>>>> +module_exit(xmgmt_exit);
>>>> +
>>>> +MODULE_DEVICE_TABLE(pci, xmgmt_pci_ids);
>>>> +MODULE_VERSION(XMGMT_DRIVER_VERSION);
>>>> +MODULE_AUTHOR("XRT Team <runtime@xilinx.com>");
>>>> +MODULE_DESCRIPTION("Xilinx Alveo management function driver");
>>>> +MODULE_LICENSE("GPL v2");
diff mbox series

Patch

diff --git a/drivers/fpga/xrt/include/xroot.h b/drivers/fpga/xrt/include/xroot.h
new file mode 100644
index 000000000000..752e10daa85e
--- /dev/null
+++ b/drivers/fpga/xrt/include/xroot.h
@@ -0,0 +1,114 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Header file for Xilinx Runtime (XRT) driver
+ *
+ * Copyright (C) 2020-2021 Xilinx, Inc.
+ *
+ * Authors:
+ *	Cheng Zhen <maxz@xilinx.com>
+ */
+
+#ifndef _XRT_ROOT_H_
+#define _XRT_ROOT_H_
+
+#include <linux/pci.h>
+#include "subdev_id.h"
+#include "events.h"
+
+typedef bool (*xrt_subdev_match_t)(enum xrt_subdev_id,
+	struct platform_device *, void *);
+#define XRT_SUBDEV_MATCH_PREV	((xrt_subdev_match_t)-1)
+#define XRT_SUBDEV_MATCH_NEXT	((xrt_subdev_match_t)-2)
+
+/*
+ * Root IOCTL calls.
+ */
+enum xrt_root_ioctl_cmd {
+	/* Leaf actions. */
+	XRT_ROOT_GET_LEAF = 0,
+	XRT_ROOT_PUT_LEAF,
+	XRT_ROOT_GET_LEAF_HOLDERS,
+
+	/* Group actions. */
+	XRT_ROOT_CREATE_GROUP,
+	XRT_ROOT_REMOVE_GROUP,
+	XRT_ROOT_LOOKUP_GROUP,
+	XRT_ROOT_WAIT_GROUP_BRINGUP,
+
+	/* Event actions. */
+	XRT_ROOT_EVENT,
+	XRT_ROOT_EVENT_ASYNC,
+
+	/* Device info. */
+	XRT_ROOT_GET_RESOURCE,
+	XRT_ROOT_GET_ID,
+
+	/* Misc. */
+	XRT_ROOT_HOT_RESET,
+	XRT_ROOT_HWMON,
+};
+
+struct xrt_root_ioctl_get_leaf {
+	struct platform_device *xpigl_pdev; /* caller's pdev */
+	xrt_subdev_match_t xpigl_match_cb;
+	void *xpigl_match_arg;
+	struct platform_device *xpigl_leaf; /* target leaf pdev */
+};
+
+struct xrt_root_ioctl_put_leaf {
+	struct platform_device *xpipl_pdev; /* caller's pdev */
+	struct platform_device *xpipl_leaf; /* target's pdev */
+};
+
+struct xrt_root_ioctl_lookup_group {
+	struct platform_device *xpilp_pdev; /* caller's pdev */
+	xrt_subdev_match_t xpilp_match_cb;
+	void *xpilp_match_arg;
+	int xpilp_grp_inst;
+};
+
+struct xrt_root_ioctl_get_holders {
+	struct platform_device *xpigh_pdev; /* caller's pdev */
+	char *xpigh_holder_buf;
+	size_t xpigh_holder_buf_len;
+};
+
+struct xrt_root_ioctl_get_res {
+	struct resource *xpigr_res;
+};
+
+struct xrt_root_ioctl_get_id {
+	unsigned short  xpigi_vendor_id;
+	unsigned short  xpigi_device_id;
+	unsigned short  xpigi_sub_vendor_id;
+	unsigned short  xpigi_sub_device_id;
+};
+
+struct xrt_root_ioctl_hwmon {
+	bool xpih_register;
+	const char *xpih_name;
+	void *xpih_drvdata;
+	const struct attribute_group **xpih_groups;
+	struct device *xpih_hwmon_dev;
+};
+
+typedef int (*xrt_subdev_root_cb_t)(struct device *, void *, u32, void *);
+int xrt_subdev_root_request(struct platform_device *self, u32 cmd, void *arg);
+
+/*
+ * Defines physical function (MPF / UPF) specific operations
+ * needed in common root driver.
+ */
+struct xroot_pf_cb {
+	void (*xpc_hot_reset)(struct pci_dev *pdev);
+};
+
+int xroot_probe(struct pci_dev *pdev, struct xroot_pf_cb *cb, void **root);
+void xroot_remove(void *root);
+bool xroot_wait_for_bringup(void *root);
+int xroot_add_vsec_node(void *root, char *dtb);
+int xroot_create_group(void *xr, char *dtb);
+int xroot_add_simple_node(void *root, char *dtb, const char *endpoint);
+void xroot_broadcast(void *root, enum xrt_events evt);
+
+#endif	/* _XRT_ROOT_H_ */
diff --git a/drivers/fpga/xrt/mgmt/root.c b/drivers/fpga/xrt/mgmt/root.c
new file mode 100644
index 000000000000..583a37c9d30c
--- /dev/null
+++ b/drivers/fpga/xrt/mgmt/root.c
@@ -0,0 +1,342 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx Alveo Management Function Driver
+ *
+ * Copyright (C) 2020-2021 Xilinx, Inc.
+ *
+ * Authors:
+ *	Cheng Zhen <maxz@xilinx.com>
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <linux/vmalloc.h>
+#include <linux/delay.h>
+
+#include "xroot.h"
+#include "main-impl.h"
+#include "metadata.h"
+
+#define XMGMT_MODULE_NAME	"xmgmt"
+#define XMGMT_DRIVER_VERSION	"4.0.0"
+
+#define XMGMT_PDEV(xm)		((xm)->pdev)
+#define XMGMT_DEV(xm)		(&(XMGMT_PDEV(xm)->dev))
+#define xmgmt_err(xm, fmt, args...)	\
+	dev_err(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
+#define xmgmt_warn(xm, fmt, args...)	\
+	dev_warn(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
+#define xmgmt_info(xm, fmt, args...)	\
+	dev_info(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
+#define xmgmt_dbg(xm, fmt, args...)	\
+	dev_dbg(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
+#define XMGMT_DEV_ID(_pcidev)			\
+	({ typeof(_pcidev) (pcidev) = (_pcidev);	\
+	((pci_domain_nr((pcidev)->bus) << 16) |	\
+	PCI_DEVID((pcidev)->bus->number, 0)); })
+
+static struct class *xmgmt_class;
+static const struct pci_device_id xmgmt_pci_ids[] = {
+	{ PCI_DEVICE(0x10EE, 0xd020), }, /* Alveo U50 (golden image) */
+	{ PCI_DEVICE(0x10EE, 0x5020), }, /* Alveo U50 */
+	{ 0, }
+};
+
+struct xmgmt {
+	struct pci_dev *pdev;
+	void *root;
+
+	bool ready;
+};
+
+static int xmgmt_config_pci(struct xmgmt *xm)
+{
+	struct pci_dev *pdev = XMGMT_PDEV(xm);
+	int rc;
+
+	rc = pcim_enable_device(pdev);
+	if (rc < 0) {
+		xmgmt_err(xm, "failed to enable device: %d", rc);
+		return rc;
+	}
+
+	rc = pci_enable_pcie_error_reporting(pdev);
+	if (rc)
+		xmgmt_warn(xm, "failed to enable AER: %d", rc);
+
+	pci_set_master(pdev);
+
+	rc = pcie_get_readrq(pdev);
+	if (rc < 0) {
+		xmgmt_err(xm, "failed to read mrrs %d", rc);
+		return rc;
+	}
+	if (rc > 512) {
+		rc = pcie_set_readrq(pdev, 512);
+		if (rc) {
+			xmgmt_err(xm, "failed to force mrrs %d", rc);
+			return rc;
+		}
+	}
+
+	return 0;
+}
+
+static int xmgmt_match_slot_and_save(struct device *dev, void *data)
+{
+	struct xmgmt *xm = data;
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (XMGMT_DEV_ID(pdev) == XMGMT_DEV_ID(xm->pdev)) {
+		pci_cfg_access_lock(pdev);
+		pci_save_state(pdev);
+	}
+
+	return 0;
+}
+
+static void xmgmt_pci_save_config_all(struct xmgmt *xm)
+{
+	bus_for_each_dev(&pci_bus_type, NULL, xm, xmgmt_match_slot_and_save);
+}
+
+static int xmgmt_match_slot_and_restore(struct device *dev, void *data)
+{
+	struct xmgmt *xm = data;
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (XMGMT_DEV_ID(pdev) == XMGMT_DEV_ID(xm->pdev)) {
+		pci_restore_state(pdev);
+		pci_cfg_access_unlock(pdev);
+	}
+
+	return 0;
+}
+
+static void xmgmt_pci_restore_config_all(struct xmgmt *xm)
+{
+	bus_for_each_dev(&pci_bus_type, NULL, xm, xmgmt_match_slot_and_restore);
+}
+
+static void xmgmt_root_hot_reset(struct pci_dev *pdev)
+{
+	struct xmgmt *xm = pci_get_drvdata(pdev);
+	struct pci_bus *bus;
+	u8 pci_bctl;
+	u16 pci_cmd, devctl;
+	int i, ret;
+
+	xmgmt_info(xm, "hot reset start");
+
+	xmgmt_pci_save_config_all(xm);
+
+	pci_disable_device(pdev);
+
+	bus = pdev->bus;
+
+	/*
+	 * When flipping the SBR bit, device can fall off the bus. This is
+	 * usually no problem at all so long as drivers are working properly
+	 * after SBR. However, some systems complain bitterly when the device
+	 * falls off the bus.
+	 * The quick solution is to temporarily disable the SERR reporting of
+	 * switch port during SBR.
+	 */
+
+	pci_read_config_word(bus->self, PCI_COMMAND, &pci_cmd);
+	pci_write_config_word(bus->self, PCI_COMMAND,
+			      (pci_cmd & ~PCI_COMMAND_SERR));
+	pcie_capability_read_word(bus->self, PCI_EXP_DEVCTL, &devctl);
+	pcie_capability_write_word(bus->self, PCI_EXP_DEVCTL,
+				   (devctl & ~PCI_EXP_DEVCTL_FERE));
+	pci_read_config_byte(bus->self, PCI_BRIDGE_CONTROL, &pci_bctl);
+	pci_bctl |= PCI_BRIDGE_CTL_BUS_RESET;
+	pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
+
+	msleep(100);
+	pci_bctl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+	pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
+	ssleep(1);
+
+	pcie_capability_write_word(bus->self, PCI_EXP_DEVCTL, devctl);
+	pci_write_config_word(bus->self, PCI_COMMAND, pci_cmd);
+
+	ret = pci_enable_device(pdev);
+	if (ret)
+		xmgmt_err(xm, "failed to enable device, ret %d", ret);
+
+	for (i = 0; i < 300; i++) {
+		pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd);
+		if (pci_cmd != 0xffff)
+			break;
+		msleep(20);
+	}
+
+	xmgmt_info(xm, "waiting for %d ms", i * 20);
+	xmgmt_pci_restore_config_all(xm);
+	xmgmt_config_pci(xm);
+}
+
+static int xmgmt_create_root_metadata(struct xmgmt *xm, char **root_dtb)
+{
+	char *dtb = NULL;
+	int ret;
+
+	ret = xrt_md_create(XMGMT_DEV(xm), &dtb);
+	if (ret) {
+		xmgmt_err(xm, "create metadata failed, ret %d", ret);
+		goto failed;
+	}
+
+	ret = xroot_add_vsec_node(xm->root, dtb);
+	if (ret == -ENOENT) {
+		/*
+		 * We may be dealing with a MFG board.
+		 * Try vsec-golden which will bring up all hard-coded leaves
+		 * at hard-coded offsets.
+		 */
+		ret = xroot_add_simple_node(xm->root, dtb, XRT_MD_NODE_VSEC_GOLDEN);
+	} else if (ret == 0) {
+		ret = xroot_add_simple_node(xm->root, dtb, XRT_MD_NODE_MGMT_MAIN);
+	}
+	if (ret)
+		goto failed;
+
+	*root_dtb = dtb;
+	return 0;
+
+failed:
+	vfree(dtb);
+	return ret;
+}
+
+static ssize_t ready_show(struct device *dev,
+			  struct device_attribute *da,
+			  char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xmgmt *xm = pci_get_drvdata(pdev);
+
+	return sprintf(buf, "%d\n", xm->ready);
+}
+static DEVICE_ATTR_RO(ready);
+
+static struct attribute *xmgmt_root_attrs[] = {
+	&dev_attr_ready.attr,
+	NULL
+};
+
+static struct attribute_group xmgmt_root_attr_group = {
+	.attrs = xmgmt_root_attrs,
+};
+
+static struct xroot_pf_cb xmgmt_xroot_pf_cb = {
+	.xpc_hot_reset = xmgmt_root_hot_reset,
+};
+
+static int xmgmt_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	int ret;
+	struct device *dev = &pdev->dev;
+	struct xmgmt *xm = devm_kzalloc(dev, sizeof(*xm), GFP_KERNEL);
+	char *dtb = NULL;
+
+	if (!xm)
+		return -ENOMEM;
+	xm->pdev = pdev;
+	pci_set_drvdata(pdev, xm);
+
+	ret = xmgmt_config_pci(xm);
+	if (ret)
+		goto failed;
+
+	ret = xroot_probe(pdev, &xmgmt_xroot_pf_cb, &xm->root);
+	if (ret)
+		goto failed;
+
+	ret = xmgmt_create_root_metadata(xm, &dtb);
+	if (ret)
+		goto failed_metadata;
+
+	ret = xroot_create_group(xm->root, dtb);
+	vfree(dtb);
+	if (ret)
+		xmgmt_err(xm, "failed to create root group: %d", ret);
+
+	if (!xroot_wait_for_bringup(xm->root))
+		xmgmt_err(xm, "failed to bringup all groups");
+	else
+		xm->ready = true;
+
+	ret = sysfs_create_group(&pdev->dev.kobj, &xmgmt_root_attr_group);
+	if (ret) {
+		/* Warning instead of failing the probe. */
+		xmgmt_warn(xm, "create xmgmt root attrs failed: %d", ret);
+	}
+
+	xroot_broadcast(xm->root, XRT_EVENT_POST_CREATION);
+	xmgmt_info(xm, "%s started successfully", XMGMT_MODULE_NAME);
+	return 0;
+
+failed_metadata:
+	(void)xroot_remove(xm->root);
+failed:
+	pci_set_drvdata(pdev, NULL);
+	return ret;
+}
+
+static void xmgmt_remove(struct pci_dev *pdev)
+{
+	struct xmgmt *xm = pci_get_drvdata(pdev);
+
+	xroot_broadcast(xm->root, XRT_EVENT_PRE_REMOVAL);
+	sysfs_remove_group(&pdev->dev.kobj, &xmgmt_root_attr_group);
+	(void)xroot_remove(xm->root);
+	pci_disable_pcie_error_reporting(xm->pdev);
+	xmgmt_info(xm, "%s cleaned up successfully", XMGMT_MODULE_NAME);
+}
+
+static struct pci_driver xmgmt_driver = {
+	.name = XMGMT_MODULE_NAME,
+	.id_table = xmgmt_pci_ids,
+	.probe = xmgmt_probe,
+	.remove = xmgmt_remove,
+};
+
+static int __init xmgmt_init(void)
+{
+	int res = 0;
+
+	res = xmgmt_main_register_leaf();
+	if (res)
+		return res;
+
+	xmgmt_class = class_create(THIS_MODULE, XMGMT_MODULE_NAME);
+	if (IS_ERR(xmgmt_class))
+		return PTR_ERR(xmgmt_class);
+
+	res = pci_register_driver(&xmgmt_driver);
+	if (res) {
+		class_destroy(xmgmt_class);
+		return res;
+	}
+
+	return 0;
+}
+
+static __exit void xmgmt_exit(void)
+{
+	pci_unregister_driver(&xmgmt_driver);
+	class_destroy(xmgmt_class);
+	xmgmt_main_unregister_leaf();
+}
+
+module_init(xmgmt_init);
+module_exit(xmgmt_exit);
+
+MODULE_DEVICE_TABLE(pci, xmgmt_pci_ids);
+MODULE_VERSION(XMGMT_DRIVER_VERSION);
+MODULE_AUTHOR("XRT Team <runtime@xilinx.com>");
+MODULE_DESCRIPTION("Xilinx Alveo management function driver");
+MODULE_LICENSE("GPL v2");