Message ID | 20240312080559.14904-2-kobayashi.da-06@fujitsu.com |
---|---|
State | New, archived |
Headers | show |
Series | Display cxl1.1 device link status | expand |
Kobayashi,Daisuke wrote: > This patch implements a process to output the link status information > of the CXL1.1 device to sysfs. The values of the registers related to > the link status are outputted into three separate files. > > In CXL1.1, the link status of the device is included in the RCRB mapped to > the memory mapped register area. This function accesses the address where > the device's RCRB is mapped. Per the comments on the cover letter I would rewrite this as: --- In CXL1.1, the link status of the device is included in the RCRB mapped to the memory mapped register area. Critically, that arrangement makes the link status and control registers invisible to existing PCI user tooling. Export those registers via sysfs with the expectation that PCI user tooling will alternatively look for these sysfs files when attempting to access these registers on CXL 1.1 endpoints. --- > Signed-off-by: "Kobayashi,Daisuke" <kobayashi.da-06@fujitsu.com> > --- > drivers/cxl/pci.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 193 insertions(+) > > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > index 4fd1f207c84e..8f66f80a7bdc 100644 > --- a/drivers/cxl/pci.c > +++ b/drivers/cxl/pci.c > @@ -781,6 +781,195 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, > return 0; > } > > +static u8 cxl_rcrb_get_pcie_cap_offset(void __iomem *addr){ > + u8 offset; > + u32 cap_hdr; > + > + offset = readb(addr + PCI_CAPABILITY_LIST); > + cap_hdr = readl(addr + offset); > + while ((cap_hdr & 0x000000ff) != PCI_CAP_ID_EXP) { > + offset = (cap_hdr >> 8) & 0x000000ff; > + if (offset == 0) // End of capability list > + return 0; > + cap_hdr = readl(addr + offset); > + } > + return offset; The location is static, so there should be no need to lookup the location every time the sysfs attribute is accessed. I also think the values are static unless the link is reset. So my expectation is that these register values can just be read once and cached. Likely the best place to do this is inside __rcrb_to_component(). That routine already has the RCRB mapped and can be refactored to collect the the link status registers. Something like, rename __rcrb_to_component() to __rcrb_to_regs() and then have it fill in an updated cxl_rcrb_info(): diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 534e25e2f0a4..16c7472877b7 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -651,7 +651,12 @@ cxl_find_dport_by_dev(struct cxl_port *port, const struct device *dport_dev) struct cxl_rcrb_info { resource_size_t base; + resource_size_t component_reg; + resource_size_t rcd_component_reg; u16 aer_cap; + u16 rcd_lnkctrl; + u16 rcd_lnkstatus; + u32 rcd_lnkcap; }; /** > + > +} > + > +static u32 cxl_rcrb_to_linkcap(struct device *dev, resource_size_t rcrb) > +{ > + void __iomem *addr; > + u8 offset; > + u32 linkcap; > + > + if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE)) > + return 0; Why is this a WARN_ON_ONCE()? In other words the caller should know ahead of time whether it has a valid RCRB base or not. ...oh, I see this is copying cxl_rcrb_to_aer(). I think that WARN_ON_ONCE() in that function is bogus as well. > + if (!request_mem_region(rcrb, SZ_4K, dev_name(dev))) > + return 0; This is awkward because it may collide with usages of the RCRB, so that is another reason to cache the values. > + > + addr = ioremap(rcrb, SZ_4K); > + if (!addr) > + goto out; > + > + offset = cxl_rcrb_get_pcie_cap_offset(addr); > + if (offset) > + dev_dbg(dev, "found PCIe capability (0x%x)\n", offset); > + else > + goto out; > + > + linkcap = readl(addr + offset + PCI_EXP_LNKCAP); > + iounmap(addr); > +out: > + release_mem_region(rcrb, SZ_4K); > + > + return linkcap; > +} > + > +static ssize_t rcd_link_cap_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct cxl_port *port; > + struct cxl_dport *dport; > + struct device *parent = dev->parent; > + struct pci_dev *parent_pdev = to_pci_dev(parent); > + u32 linkcap; > + > + port = cxl_pci_find_port(parent_pdev, &dport); > + if (!port) > + return -EINVAL; > + > + linkcap = cxl_rcrb_to_linkcap(dev, dport->rcrb.base + SZ_4K); > + return sysfs_emit(buf, "%x\n", linkcap); This and the other ones should be using "%#x\n" so that the format of the number base is included. > +} > +static DEVICE_ATTR_RO(rcd_link_cap); > + > +static u16 cxl_rcrb_to_linkctr(struct device *dev, resource_size_t rcrb) > +{ > + void __iomem *addr; > + u8 offset; > + u16 linkctrl; > + > + if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE)) > + return 0; > + > + if (!request_mem_region(rcrb, SZ_4K, dev_name(dev))) > + return 0; ...the other benefit of centralizing this code is that we do not end up with multiple copies of similar, but slightly different code. > + > + addr = ioremap(rcrb, SZ_4K); > + if (!addr) > + goto out; > + > + offset = cxl_rcrb_get_pcie_cap_offset(addr); > + if (offset) > + dev_dbg(dev, "found PCIe capability (0x%x)\n", offset); > + else > + goto out; > + > + linkctrl = readw(addr + offset + PCI_EXP_LNKCTL); > + iounmap(addr); > +out: > + release_mem_region(rcrb, SZ_4K); > + > + return linkctrl; > +} > + > +static ssize_t rcd_link_ctrl_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct cxl_port *port; > + struct cxl_dport *dport; > + struct device *parent = dev->parent; > + struct pci_dev *parent_pdev = to_pci_dev(parent); > + u16 linkctrl; > + > + port = cxl_pci_find_port(parent_pdev, &dport); > + if (!port) > + return -EINVAL; > + > + > + linkctrl = cxl_rcrb_to_linkctr(dev, dport->rcrb.base + SZ_4K); > + > + return sysfs_emit(buf, "%x\n", linkctrl); > +} > +static DEVICE_ATTR_RO(rcd_link_ctrl); > + > +static u16 cxl_rcrb_to_linkstatus(struct device *dev, resource_size_t rcrb) > +{ > + void __iomem *addr; > + u8 offset; > + u16 linksta; > + > + if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE)) > + return 0; > + > + if (!request_mem_region(rcrb, SZ_4K, dev_name(dev))) > + return 0; > + > + addr = ioremap(rcrb, SZ_4K); > + if (!addr) > + goto out; > + > + offset = cxl_rcrb_get_pcie_cap_offset(addr); > + if (offset) > + dev_dbg(dev, "found PCIe capability (0x%x)\n", offset); > + else > + goto out; > + > + linksta = readw(addr + offset + PCI_EXP_LNKSTA); > + iounmap(addr); > +out: > + release_mem_region(rcrb, SZ_4K); > + > + return linksta; > +} > + > +static ssize_t rcd_link_status_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct cxl_port *port; > + struct cxl_dport *dport; > + struct device *parent = dev->parent; > + struct pci_dev *parent_pdev = to_pci_dev(parent); > + u16 linkstatus; > + > + port = cxl_pci_find_port(parent_pdev, &dport); > + if (!port) > + return -EINVAL; > + > + linkstatus = cxl_rcrb_to_linkstatus(dev, dport->rcrb.base + SZ_4K); > + > + return sysfs_emit(buf, "%x\n", linkstatus); > +} > +static DEVICE_ATTR_RO(rcd_link_status); > + > +static struct attribute *cxl_rcd_attrs[] = { > + &dev_attr_rcd_link_cap.attr, > + &dev_attr_rcd_link_ctrl.attr, > + &dev_attr_rcd_link_status.attr, > + NULL > +}; > + > +static umode_t cxl_rcd_visible(struct kobject *kobj, > + struct attribute *a, int n) > +{ > + struct device *dev = kobj_to_dev(kobj); > + struct pci_dev *pdev = to_pci_dev(dev); > + > + if (is_cxl_restricted(pdev)) > + return a->mode; > + > + return 0; > +} > + > +static struct attribute_group cxl_rcd_group = { > + .attrs = cxl_rcd_attrs, > + .is_visible = cxl_rcd_visible, > +}; > + > +__ATTRIBUTE_GROUPS(cxl_rcd); > + > static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > { > struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); > @@ -806,6 +995,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > if (IS_ERR(mds)) > return PTR_ERR(mds); > cxlds = &mds->cxlds; > + device_create_file(&pdev->dev, &dev_attr_rcd_link_cap); > + device_create_file(&pdev->dev, &dev_attr_rcd_link_ctrl); > + device_create_file(&pdev->dev, &dev_attr_rcd_link_status); No need to manually call device_create_file() when the attribute group is already registered below. I am surprised you did not get duplicate sysfs file warnings when registering these files twice. > pci_set_drvdata(pdev, cxlds); > > cxlds->rcd = is_cxl_restricted(pdev); > @@ -967,6 +1159,7 @@ static struct pci_driver cxl_pci_driver = { > .err_handler = &cxl_error_handlers, > .driver = { > .probe_type = PROBE_PREFER_ASYNCHRONOUS, > + .dev_groups = cxl_rcd_groups, > }, > }; > > -- > 2.43.0 > >
Dan Williams wrote: > Kobayashi,Daisuke wrote: > > This patch implements a process to output the link status information > > of the CXL1.1 device to sysfs. The values of the registers related to > > the link status are outputted into three separate files. > > > > In CXL1.1, the link status of the device is included in the RCRB > > mapped to the memory mapped register area. This function accesses the > > address where the device's RCRB is mapped. > > Per the comments on the cover letter I would rewrite this as: > > --- > In CXL1.1, the link status of the device is included in the RCRB mapped to the > memory mapped register area. Critically, that arrangement makes the link > status and control registers invisible to existing PCI user tooling. > > Export those registers via sysfs with the expectation that PCI user tooling will > alternatively look for these sysfs files when attempting to access these > registers on CXL 1.1 endpoints. > --- > This message will be updated in the next patch. Thank you for your helpful feedback. > > Signed-off-by: "Kobayashi,Daisuke" <kobayashi.da-06@fujitsu.com> > > --- > > drivers/cxl/pci.c | 193 > > ++++++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 193 insertions(+) > > > > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index > > 4fd1f207c84e..8f66f80a7bdc 100644 > > --- a/drivers/cxl/pci.c > > +++ b/drivers/cxl/pci.c > > @@ -781,6 +781,195 @@ static int cxl_event_config(struct pci_host_bridge > *host_bridge, > > return 0; > > } > > > > +static u8 cxl_rcrb_get_pcie_cap_offset(void __iomem *addr){ > > + u8 offset; > > + u32 cap_hdr; > > + > > + offset = readb(addr + PCI_CAPABILITY_LIST); > > + cap_hdr = readl(addr + offset); > > + while ((cap_hdr & 0x000000ff) != PCI_CAP_ID_EXP) { > > + offset = (cap_hdr >> 8) & 0x000000ff; > > + if (offset == 0) // End of capability list > > + return 0; > > + cap_hdr = readl(addr + offset); > > + } > > + return offset; > > The location is static, so there should be no need to lookup the location every > time the sysfs attribute is accessed. I also think the values are static unless the > link is reset. So my expectation is that these register values can just be read > once and cached. > > Likely the best place to do this is inside __rcrb_to_component(). That routine > already has the RCRB mapped and can be refactored to collect the the link > status registers. Something like, rename __rcrb_to_component() to > __rcrb_to_regs() and then have it fill in an updated cxl_rcrb_info(): > Add processing to__rcrb_to_component() to change the implementation to cache these values. As you say, I think these values are static, so it's not efficient to access the RCRB every time you access the sysfs attribute. > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index > 534e25e2f0a4..16c7472877b7 100644 > --- a/drivers/cxl/cxl.h > +++ b/drivers/cxl/cxl.h > @@ -651,7 +651,12 @@ cxl_find_dport_by_dev(struct cxl_port *port, const > struct device *dport_dev) > > struct cxl_rcrb_info { > resource_size_t base; > + resource_size_t component_reg; > + resource_size_t rcd_component_reg; > u16 aer_cap; > + u16 rcd_lnkctrl; > + u16 rcd_lnkstatus; > + u32 rcd_lnkcap; > }; > > /** > > > + > > +} > > + > > +static u32 cxl_rcrb_to_linkcap(struct device *dev, resource_size_t > > +rcrb) { > > + void __iomem *addr; > > + u8 offset; > > + u32 linkcap; > > + > > + if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE)) > > + return 0; > > Why is this a WARN_ON_ONCE()? In other words the caller should know ahead > of time whether it has a valid RCRB base or not. > > ...oh, I see this is copying cxl_rcrb_to_aer(). I think that > WARN_ON_ONCE() in that function is bogus as well. > > Yes, as you mentioned, I have copied cxl_rcrb_to_aer(). However, it seems to be an improper implementation. In the next patch, I will modify the implementation to cache the value, and consequently, this part of the code will be removed. > > + if (!request_mem_region(rcrb, SZ_4K, dev_name(dev))) > > + return 0; > > This is awkward because it may collide with usages of the RCRB, so that is > another reason to cache the values. > > > + > > + addr = ioremap(rcrb, SZ_4K); > > + if (!addr) > > + goto out; > > + > > + offset = cxl_rcrb_get_pcie_cap_offset(addr); > > + if (offset) > > + dev_dbg(dev, "found PCIe capability (0x%x)\n", offset); > > + else > > + goto out; > > + > > + linkcap = readl(addr + offset + PCI_EXP_LNKCAP); > > + iounmap(addr); > > +out: > > + release_mem_region(rcrb, SZ_4K); > > + > > + return linkcap; > > +} > > + > > +static ssize_t rcd_link_cap_show(struct device *dev, > > + struct device_attribute *attr, char *buf) { > > + struct cxl_port *port; > > + struct cxl_dport *dport; > > + struct device *parent = dev->parent; > > + struct pci_dev *parent_pdev = to_pci_dev(parent); > > + u32 linkcap; > > + > > + port = cxl_pci_find_port(parent_pdev, &dport); > > + if (!port) > > + return -EINVAL; > > + > > + linkcap = cxl_rcrb_to_linkcap(dev, dport->rcrb.base + SZ_4K); > > + return sysfs_emit(buf, "%x\n", linkcap); > > This and the other ones should be using "%#x\n" so that the format of the > number base is included. > I will fix them. Thank you. > > +} > > +static DEVICE_ATTR_RO(rcd_link_cap); > > + > > +static u16 cxl_rcrb_to_linkctr(struct device *dev, resource_size_t > > +rcrb) { > > + void __iomem *addr; > > + u8 offset; > > + u16 linkctrl; > > + > > + if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE)) > > + return 0; > > + > > + if (!request_mem_region(rcrb, SZ_4K, dev_name(dev))) > > + return 0; > > ...the other benefit of centralizing this code is that we do not end up with > multiple copies of similar, but slightly different code. > Are you saying that caching values simplifies the show function? Then I think you're right. I will change that the value should be cached in the same way as the component register. > > + > > + addr = ioremap(rcrb, SZ_4K); > > + if (!addr) > > + goto out; > > + > > + offset = cxl_rcrb_get_pcie_cap_offset(addr); > > + if (offset) > > + dev_dbg(dev, "found PCIe capability (0x%x)\n", offset); > > + else > > + goto out; > > + > > + linkctrl = readw(addr + offset + PCI_EXP_LNKCTL); > > + iounmap(addr); > > +out: > > + release_mem_region(rcrb, SZ_4K); > > + > > + return linkctrl; > > +} > > + > > +static ssize_t rcd_link_ctrl_show(struct device *dev, > > + struct device_attribute *attr, char *buf) { > > + struct cxl_port *port; > > + struct cxl_dport *dport; > > + struct device *parent = dev->parent; > > + struct pci_dev *parent_pdev = to_pci_dev(parent); > > + u16 linkctrl; > > + > > + port = cxl_pci_find_port(parent_pdev, &dport); > > + if (!port) > > + return -EINVAL; > > + > > + > > + linkctrl = cxl_rcrb_to_linkctr(dev, dport->rcrb.base + SZ_4K); > > + > > + return sysfs_emit(buf, "%x\n", linkctrl); } static > > +DEVICE_ATTR_RO(rcd_link_ctrl); > > + > > +static u16 cxl_rcrb_to_linkstatus(struct device *dev, resource_size_t > > +rcrb) { > > + void __iomem *addr; > > + u8 offset; > > + u16 linksta; > > + > > + if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE)) > > + return 0; > > + > > + if (!request_mem_region(rcrb, SZ_4K, dev_name(dev))) > > + return 0; > > + > > + addr = ioremap(rcrb, SZ_4K); > > + if (!addr) > > + goto out; > > + > > + offset = cxl_rcrb_get_pcie_cap_offset(addr); > > + if (offset) > > + dev_dbg(dev, "found PCIe capability (0x%x)\n", offset); > > + else > > + goto out; > > + > > + linksta = readw(addr + offset + PCI_EXP_LNKSTA); > > + iounmap(addr); > > +out: > > + release_mem_region(rcrb, SZ_4K); > > + > > + return linksta; > > +} > > + > > +static ssize_t rcd_link_status_show(struct device *dev, > > + struct device_attribute *attr, char *buf) { > > + struct cxl_port *port; > > + struct cxl_dport *dport; > > + struct device *parent = dev->parent; > > + struct pci_dev *parent_pdev = to_pci_dev(parent); > > + u16 linkstatus; > > + > > + port = cxl_pci_find_port(parent_pdev, &dport); > > + if (!port) > > + return -EINVAL; > > + > > + linkstatus = cxl_rcrb_to_linkstatus(dev, dport->rcrb.base + SZ_4K); > > + > > + return sysfs_emit(buf, "%x\n", linkstatus); } static > > +DEVICE_ATTR_RO(rcd_link_status); > > + > > +static struct attribute *cxl_rcd_attrs[] = { > > + &dev_attr_rcd_link_cap.attr, > > + &dev_attr_rcd_link_ctrl.attr, > > + &dev_attr_rcd_link_status.attr, > > + NULL > > +}; > > + > > +static umode_t cxl_rcd_visible(struct kobject *kobj, > > + struct attribute *a, int n) > > +{ > > + struct device *dev = kobj_to_dev(kobj); > > + struct pci_dev *pdev = to_pci_dev(dev); > > + > > + if (is_cxl_restricted(pdev)) > > + return a->mode; > > + > > + return 0; > > +} > > + > > +static struct attribute_group cxl_rcd_group = { > > + .attrs = cxl_rcd_attrs, > > + .is_visible = cxl_rcd_visible, > > +}; > > + > > +__ATTRIBUTE_GROUPS(cxl_rcd); > > + > > static int cxl_pci_probe(struct pci_dev *pdev, const struct > > pci_device_id *id) { > > struct pci_host_bridge *host_bridge = > > pci_find_host_bridge(pdev->bus); @@ -806,6 +995,9 @@ static int > cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > > if (IS_ERR(mds)) > > return PTR_ERR(mds); > > cxlds = &mds->cxlds; > > + device_create_file(&pdev->dev, &dev_attr_rcd_link_cap); > > + device_create_file(&pdev->dev, &dev_attr_rcd_link_ctrl); > > + device_create_file(&pdev->dev, &dev_attr_rcd_link_status); > > No need to manually call device_create_file() when the attribute group is > already registered below. I am surprised you did not get duplicate sysfs file > warnings when registering these files twice. > Thank you for pointing it out. Remove these calls. > > pci_set_drvdata(pdev, cxlds); > > > > cxlds->rcd = is_cxl_restricted(pdev); @@ -967,6 +1159,7 @@ static > > struct pci_driver cxl_pci_driver = { > > .err_handler = &cxl_error_handlers, > > .driver = { > > .probe_type = PROBE_PREFER_ASYNCHRONOUS, > > + .dev_groups = cxl_rcd_groups, > > }, > > }; > > > > -- > > 2.43.0 > > > > >
> Dan Williams wrote: > > Kobayashi,Daisuke wrote: > > > +static struct attribute_group cxl_rcd_group = { > > > + .attrs = cxl_rcd_attrs, > > > + .is_visible = cxl_rcd_visible, > > > +}; > > > + > > > +__ATTRIBUTE_GROUPS(cxl_rcd); > > > + > > > static int cxl_pci_probe(struct pci_dev *pdev, const struct > > > pci_device_id *id) { > > > struct pci_host_bridge *host_bridge = > > > pci_find_host_bridge(pdev->bus); @@ -806,6 +995,9 @@ static int > > cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > > > if (IS_ERR(mds)) > > > return PTR_ERR(mds); > > > cxlds = &mds->cxlds; > > > + device_create_file(&pdev->dev, &dev_attr_rcd_link_cap); > > > + device_create_file(&pdev->dev, &dev_attr_rcd_link_ctrl); > > > + device_create_file(&pdev->dev, &dev_attr_rcd_link_status); > > > > No need to manually call device_create_file() when the attribute group is > > already registered below. I am surprised you did not get duplicate sysfs file > > warnings when registering these files twice. > > > > Thank you for pointing it out. Remove these calls. > If you are aware of the cause, I would appreciate your insight. In my environment, when I removed this device_create_file(), the file was not generated in sysfs. Therefore, I have not been able to remove this manual procedure at the moment. Is there a possibility that simply registering with struct pci_driver.driver.groups will not generate a sysfs file? > > > pci_set_drvdata(pdev, cxlds); > > > > > > cxlds->rcd = is_cxl_restricted(pdev); @@ -967,6 +1159,7 @@ static > > > struct pci_driver cxl_pci_driver = { > > > .err_handler = &cxl_error_handlers, > > > .driver = { > > > .probe_type = PROBE_PREFER_ASYNCHRONOUS, > > > + .dev_groups = cxl_rcd_groups, > > > }, > > > }; > > > > > > -- > > > 2.43.0 > > > > > > > > >
Kobayashi,Daisuke wrote: > > Dan Williams wrote: > > > Kobayashi,Daisuke wrote: > > > > +static struct attribute_group cxl_rcd_group = { > > > > + .attrs = cxl_rcd_attrs, > > > > + .is_visible = cxl_rcd_visible, > > > > +}; > > > > + > > > > +__ATTRIBUTE_GROUPS(cxl_rcd); > > > > + > > > > static int cxl_pci_probe(struct pci_dev *pdev, const struct > > > > pci_device_id *id) { > > > > struct pci_host_bridge *host_bridge = > > > > pci_find_host_bridge(pdev->bus); @@ -806,6 +995,9 @@ static int > > > cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > > > > if (IS_ERR(mds)) > > > > return PTR_ERR(mds); > > > > cxlds = &mds->cxlds; > > > > + device_create_file(&pdev->dev, &dev_attr_rcd_link_cap); > > > > + device_create_file(&pdev->dev, &dev_attr_rcd_link_ctrl); > > > > + device_create_file(&pdev->dev, &dev_attr_rcd_link_status); > > > > > > No need to manually call device_create_file() when the attribute group is > > > already registered below. I am surprised you did not get duplicate sysfs file > > > warnings when registering these files twice. > > > > > > > Thank you for pointing it out. Remove these calls. > > > If you are aware of the cause, I would appreciate your insight. > In my environment, when I removed this device_create_file(), > the file was not generated in sysfs. Therefore, I have not been > able to remove this manual procedure at the moment. Is there a > possibility that simply registering with > struct pci_driver.driver.groups will not generate a sysfs file? > I would like to report on some additional findings. The process of registering cxl_rcd_groups to struct pci_driver.driver.dev_groups seems to not generate a file in sysfs when looking at the contents of the module_pci_driver() macro. For this feature, I think it would be best to output the values to a directory of /sys/bus/pci/devices/<pci-addr>/. To output to this directory, the attribute would need to be registered to pci_dev.dev. My current understanding is that the best way to do this would be to register the attribute with device_add_groups(&pdev->dev, cxl_rcd_groups) on probe and remove the files with device_remove_groups(&pdev->dev, cxl_rcd_groups) on remove. I have attached the code below. Is my usage of probe/remove correct? If so, I will update and resubmit the patch with the following code. static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); @@ -812,6 +885,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(mds)) return PTR_ERR(mds); cxlds = &mds->cxlds; + rc = device_add_groups(&pdev->dev, cxl_rcd_groups); + if (rc) + dev_warn(&pdev->dev, "Couldn't make rcd_groups (%d)\n", rc); pci_set_drvdata(pdev, cxlds); cxlds->rcd = is_cxl_restricted(pdev); @@ -964,10 +1040,18 @@ static const struct pci_error_handlers cxl_error_handlers = { .cor_error_detected = cxl_cor_error_detected, }; +static void cxl_pci_remove(struct pci_dev *pdev) +{ + if (is_cxl_restricted(pdev)) + device_remove_groups(&pdev->dev, cxl_rcd_groups); +} + static struct pci_driver cxl_pci_driver = { .name = KBUILD_MODNAME, .id_table = cxl_mem_pci_tbl, .probe = cxl_pci_probe, + .remove = cxl_pci_remove, .err_handler = &cxl_error_handlers, .driver = { .probe_type = PROBE_PREFER_ASYNCHRONOUS, --
On Wed, 3 Apr 2024 09:40:27 +0000 "Daisuke Kobayashi (Fujitsu)" <kobayashi.da-06@fujitsu.com> wrote: > > Dan Williams wrote: > > > Kobayashi,Daisuke wrote: > > > > +static struct attribute_group cxl_rcd_group = { > > > > + .attrs = cxl_rcd_attrs, > > > > + .is_visible = cxl_rcd_visible, > > > > +}; > > > > + > > > > +__ATTRIBUTE_GROUPS(cxl_rcd); > > > > + > > > > static int cxl_pci_probe(struct pci_dev *pdev, const struct > > > > pci_device_id *id) { > > > > struct pci_host_bridge *host_bridge = > > > > pci_find_host_bridge(pdev->bus); @@ -806,6 +995,9 @@ static int > > > cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > > > > if (IS_ERR(mds)) > > > > return PTR_ERR(mds); > > > > cxlds = &mds->cxlds; > > > > + device_create_file(&pdev->dev, &dev_attr_rcd_link_cap); > > > > + device_create_file(&pdev->dev, &dev_attr_rcd_link_ctrl); > > > > + device_create_file(&pdev->dev, &dev_attr_rcd_link_status); > > > > > > No need to manually call device_create_file() when the attribute group is > > > already registered below. I am surprised you did not get duplicate sysfs file > > > warnings when registering these files twice. > > > > > > > Thank you for pointing it out. Remove these calls. > > > If you are aware of the cause, I would appreciate your insight. > In my environment, when I removed this device_create_file(), > the file was not generated in sysfs. Therefore, I have not been > able to remove this manual procedure at the moment. Is there a > possibility that simply registering with > struct pci_driver.driver.groups will not generate a sysfs file? > > > > > pci_set_drvdata(pdev, cxlds); > > > > > > > > cxlds->rcd = is_cxl_restricted(pdev); @@ -967,6 +1159,7 @@ static > > > > struct pci_driver cxl_pci_driver = { > > > > .err_handler = &cxl_error_handlers, > > > > .driver = { > > > > .probe_type = PROBE_PREFER_ASYNCHRONOUS, > > > > + .dev_groups = cxl_rcd_groups, Odd though it may seem, try setting .dev_groups in the outer structure not the inner one. .err_handler = &.... .dev_groups = cxl_rcd_groups, .driver = { ... }, Similar to: https://elixir.bootlin.com/linux/latest/source/drivers/crypto/ccp/sp-pci.c#L592 For reasons I don't follow, __pci_register_driver() overrides the internal one. Some ancient bit of code migration that never finished? https://elixir.bootlin.com/linux/latest/source/drivers/pci/pci-driver.c#L1447 I did some digging. https://lore.kernel.org/all/20190731124349.4474-2-gregkh@linuxfoundation.org/ So this got added to the driver core fairly recently (only 4 years ago ;) The the dev_groups was added to pci in https://lore.kernel.org/all/20210512142648.666476-8-andrey.grodzovsky@amd.com/ I'm not sure why the bounce via pci_driver is needed though. Greg, looks like this came from usb originally, can you recall the reasoning? > > > > }, > > > > }; > > > > > > > > -- > > > > 2.43.0 > > > > > > > > > > > > > > >
Daisuke Kobayashi (Fujitsu) wrote: [..] > > Thank you for pointing it out. Remove these calls. > > > If you are aware of the cause, I would appreciate your insight. > In my environment, when I removed this device_create_file(), > the file was not generated in sysfs. Therefore, I have not been > able to remove this manual procedure at the moment. Is there a > possibility that simply registering with > struct pci_driver.driver.groups will not generate a sysfs file? Be careful, are you assigning cxl_rcd_groups to "pci_driver.driver.groups", or "pci_driver.driver.dev_groups"? "dev_groups" adds them to the PCI device object, "groups" adds them to the *driver* object (/sys/bus/pci/drivers/cxl_pci/$files).
Daisuke Kobayashi (Fujitsu) wrote: [..] > I would like to report on some additional findings. > The process of registering cxl_rcd_groups to struct pci_driver.driver.dev_groups seems > to not generate a file in sysfs when looking at the contents of the module_pci_driver() macro. Oh, apologies, and thanks for taking a deeper look. The failure is because my suggested example led you astray. I suggested this: diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 2ff361e756d6..eec04f103aa8 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -971,6 +971,7 @@ static struct pci_driver cxl_pci_driver = { .err_handler = &cxl_error_handlers, .driver = { .probe_type = PROBE_PREFER_ASYNCHRONOUS, + .dev_groups = cxl_rcd_groups, }, }; ...the correct place to put it is here: @@ -969,6 +969,7 @@ static struct pci_driver cxl_pci_driver = { .id_table = cxl_mem_pci_tbl, .probe = cxl_pci_probe, .err_handler = &cxl_error_handlers, + .dev_groups = cxl_rcd_groups, .driver = { .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, ...otherwise __pci_register_driver() will overwrite it. This is a subtle bug given probe_type is directly initialized in .driver. > For this feature, I think it would be best to output the values to a directory > of /sys/bus/pci/devices/<pci-addr>/. To output to this directory, the attribute would > need to be registered to pci_dev.dev. > My current understanding is that the best way to do this would be to register the attribute > with device_add_groups(&pdev->dev, cxl_rcd_groups) on probe and remove the files with No, the dynamic sysfs registration APIs should be avoided when possible. The above fix is what you need.
Dan Williams wrote: > Daisuke Kobayashi (Fujitsu) wrote: > [..] > > I would like to report on some additional findings. > > The process of registering cxl_rcd_groups to struct > > pci_driver.driver.dev_groups seems to not generate a file in sysfs when > looking at the contents of the module_pci_driver() macro. > > Oh, apologies, and thanks for taking a deeper look. The failure is because my > suggested example led you astray. I suggested this: > > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index > 2ff361e756d6..eec04f103aa8 100644 > --- a/drivers/cxl/pci.c > +++ b/drivers/cxl/pci.c > @@ -971,6 +971,7 @@ static struct pci_driver cxl_pci_driver = { > .err_handler = &cxl_error_handlers, > .driver = { > .probe_type = PROBE_PREFER_ASYNCHRONOUS, > + .dev_groups = cxl_rcd_groups, > }, > }; > > > ...the correct place to put it is here: > > @@ -969,6 +969,7 @@ static struct pci_driver cxl_pci_driver = { > .id_table = cxl_mem_pci_tbl, > .probe = cxl_pci_probe, > .err_handler = &cxl_error_handlers, > + .dev_groups = cxl_rcd_groups, > .driver = { > .probe_type = PROBE_PREFER_ASYNCHRONOUS, > }, > > > ...otherwise __pci_register_driver() will overwrite it. This is a subtle bug given > probe_type is directly initialized in .driver. > > > For this feature, I think it would be best to output the values to a > > directory of /sys/bus/pci/devices/<pci-addr>/. To output to this > > directory, the attribute would need to be registered to pci_dev.dev. > > My current understanding is that the best way to do this would be to > > register the attribute with device_add_groups(&pdev->dev, > > cxl_rcd_groups) on probe and remove the files with > > No, the dynamic sysfs registration APIs should be avoided when possible. > The above fix is what you need. Thank you. With your guidance and the hint in the previous email, I now understand the idea of "groups" and "dev_groups". I see that, by registering "cxl_rcd_groups" with "pci_driver.dev_groups", files are created in device's sysfs when the device is attached. I will update the patch to reflect this.
On Tue, Mar 12, 2024 at 05:05:57PM +0900, Kobayashi,Daisuke wrote: > This patch implements a process to output the link status information > of the CXL1.1 device to sysfs. The values of the registers related to > the link status are outputted into three separate files. > > In CXL1.1, the link status of the device is included in the RCRB mapped to > the memory mapped register area. This function accesses the address where > the device's RCRB is mapped. > > Spurious blank line in the commit log. Perhaps include the names of the sysfs files? And a hint of what they mean? I think it's also conventional for the patch to add entries to Documentation/ABI/... to show how to use the new files. > +static u8 cxl_rcrb_get_pcie_cap_offset(void __iomem *addr){ Opening brace would typically be on the next line. > + u8 offset; > + u32 cap_hdr; > + > + offset = readb(addr + PCI_CAPABILITY_LIST); > + cap_hdr = readl(addr + offset); > + while ((cap_hdr & 0x000000ff) != PCI_CAP_ID_EXP) { > + offset = (cap_hdr >> 8) & 0x000000ff; > + if (offset == 0) // End of capability list > + return 0; > + cap_hdr = readl(addr + offset); > + } > + return offset; Possibly mimic the name and structure of pci_find_capability(), in particular, the loop structure of __pci_find_next_cap_ttl(). > + Spurious blank line. > +} > + > +static u32 cxl_rcrb_to_linkcap(struct device *dev, resource_size_t rcrb) > +{ > + void __iomem *addr; > + u8 offset; > + u32 linkcap; > + > + if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE)) > + return 0; > + > + if (!request_mem_region(rcrb, SZ_4K, dev_name(dev))) > + return 0; > + > + addr = ioremap(rcrb, SZ_4K); > + if (!addr) > + goto out; > + > + offset = cxl_rcrb_get_pcie_cap_offset(addr); > + if (offset) > + dev_dbg(dev, "found PCIe capability (0x%x)\n", offset); > + else > + goto out; > + > + linkcap = readl(addr + offset + PCI_EXP_LNKCAP); > + iounmap(addr); > +out: > + release_mem_region(rcrb, SZ_4K); > + > + return linkcap; > +} > +static u16 cxl_rcrb_to_linkctr(struct device *dev, resource_size_t rcrb) Why name this "linkctr" when other references here use "linkctrl"? > +{ > + void __iomem *addr; > + u8 offset; > + u16 linkctrl; > + > + if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE)) > + return 0; > + > + if (!request_mem_region(rcrb, SZ_4K, dev_name(dev))) > + return 0; > + > + addr = ioremap(rcrb, SZ_4K); > + if (!addr) > + goto out; > + > + offset = cxl_rcrb_get_pcie_cap_offset(addr); > + if (offset) > + dev_dbg(dev, "found PCIe capability (0x%x)\n", offset); > + else > + goto out; > + > + linkctrl = readw(addr + offset + PCI_EXP_LNKCTL); > + iounmap(addr); > +out: > + release_mem_region(rcrb, SZ_4K); > + > + return linkctrl; There's a lot of duplicated boilerplate here between cxl_rcrb_to_linkcap(), cxl_rcrb_to_linkctr(), cxl_rcrb_to_linkstatus(). It also seems like a lot of repeated work to search for the PCIe Cap, ioremap, tear down, etc., for each file, every time it is read. I assume most readers will be interested in all three items at the same time. > +static umode_t cxl_rcd_visible(struct kobject *kobj, > + struct attribute *a, int n) > +{ > + struct device *dev = kobj_to_dev(kobj); > + struct pci_dev *pdev = to_pci_dev(dev); > + > + if (is_cxl_restricted(pdev)) Not related to *this* patch, but I can't connect the dots between the "is_cxl_restricted()" name, the meaning of "restricted", and the "CXL memory expander class code" mentioned in the is_cxl_restricted() function comment. It doesn't check the "class code". It's not obvious why this applies to RCiEPs but not other endpoints. No doubt all obvious to the CXL-initiated, which I am not. Bjorn
On Tue, Apr 09, 2024 at 09:59:35AM -0500, Bjorn Helgaas wrote: > On Tue, Mar 12, 2024 at 05:05:57PM +0900, Kobayashi,Daisuke wrote: > > This patch implements a process to output the link status information > > of the CXL1.1 device to sysfs. The values of the registers related to > > the link status are outputted into three separate files. > > > > In CXL1.1, the link status of the device is included in the RCRB mapped to > > the memory mapped register area. This function accesses the address where > > the device's RCRB is mapped. > > > > Sorry for commenting on v3 when you already posted v4.
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 4fd1f207c84e..8f66f80a7bdc 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -781,6 +781,195 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, return 0; } +static u8 cxl_rcrb_get_pcie_cap_offset(void __iomem *addr){ + u8 offset; + u32 cap_hdr; + + offset = readb(addr + PCI_CAPABILITY_LIST); + cap_hdr = readl(addr + offset); + while ((cap_hdr & 0x000000ff) != PCI_CAP_ID_EXP) { + offset = (cap_hdr >> 8) & 0x000000ff; + if (offset == 0) // End of capability list + return 0; + cap_hdr = readl(addr + offset); + } + return offset; + +} + +static u32 cxl_rcrb_to_linkcap(struct device *dev, resource_size_t rcrb) +{ + void __iomem *addr; + u8 offset; + u32 linkcap; + + if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE)) + return 0; + + if (!request_mem_region(rcrb, SZ_4K, dev_name(dev))) + return 0; + + addr = ioremap(rcrb, SZ_4K); + if (!addr) + goto out; + + offset = cxl_rcrb_get_pcie_cap_offset(addr); + if (offset) + dev_dbg(dev, "found PCIe capability (0x%x)\n", offset); + else + goto out; + + linkcap = readl(addr + offset + PCI_EXP_LNKCAP); + iounmap(addr); +out: + release_mem_region(rcrb, SZ_4K); + + return linkcap; +} + +static ssize_t rcd_link_cap_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_port *port; + struct cxl_dport *dport; + struct device *parent = dev->parent; + struct pci_dev *parent_pdev = to_pci_dev(parent); + u32 linkcap; + + port = cxl_pci_find_port(parent_pdev, &dport); + if (!port) + return -EINVAL; + + linkcap = cxl_rcrb_to_linkcap(dev, dport->rcrb.base + SZ_4K); + return sysfs_emit(buf, "%x\n", linkcap); +} +static DEVICE_ATTR_RO(rcd_link_cap); + +static u16 cxl_rcrb_to_linkctr(struct device *dev, resource_size_t rcrb) +{ + void __iomem *addr; + u8 offset; + u16 linkctrl; + + if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE)) + return 0; + + if (!request_mem_region(rcrb, SZ_4K, dev_name(dev))) + return 0; + + addr = ioremap(rcrb, SZ_4K); + if (!addr) + goto out; + + offset = cxl_rcrb_get_pcie_cap_offset(addr); + if (offset) + dev_dbg(dev, "found PCIe capability (0x%x)\n", offset); + else + goto out; + + linkctrl = readw(addr + offset + PCI_EXP_LNKCTL); + iounmap(addr); +out: + release_mem_region(rcrb, SZ_4K); + + return linkctrl; +} + +static ssize_t rcd_link_ctrl_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_port *port; + struct cxl_dport *dport; + struct device *parent = dev->parent; + struct pci_dev *parent_pdev = to_pci_dev(parent); + u16 linkctrl; + + port = cxl_pci_find_port(parent_pdev, &dport); + if (!port) + return -EINVAL; + + + linkctrl = cxl_rcrb_to_linkctr(dev, dport->rcrb.base + SZ_4K); + + return sysfs_emit(buf, "%x\n", linkctrl); +} +static DEVICE_ATTR_RO(rcd_link_ctrl); + +static u16 cxl_rcrb_to_linkstatus(struct device *dev, resource_size_t rcrb) +{ + void __iomem *addr; + u8 offset; + u16 linksta; + + if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE)) + return 0; + + if (!request_mem_region(rcrb, SZ_4K, dev_name(dev))) + return 0; + + addr = ioremap(rcrb, SZ_4K); + if (!addr) + goto out; + + offset = cxl_rcrb_get_pcie_cap_offset(addr); + if (offset) + dev_dbg(dev, "found PCIe capability (0x%x)\n", offset); + else + goto out; + + linksta = readw(addr + offset + PCI_EXP_LNKSTA); + iounmap(addr); +out: + release_mem_region(rcrb, SZ_4K); + + return linksta; +} + +static ssize_t rcd_link_status_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_port *port; + struct cxl_dport *dport; + struct device *parent = dev->parent; + struct pci_dev *parent_pdev = to_pci_dev(parent); + u16 linkstatus; + + port = cxl_pci_find_port(parent_pdev, &dport); + if (!port) + return -EINVAL; + + linkstatus = cxl_rcrb_to_linkstatus(dev, dport->rcrb.base + SZ_4K); + + return sysfs_emit(buf, "%x\n", linkstatus); +} +static DEVICE_ATTR_RO(rcd_link_status); + +static struct attribute *cxl_rcd_attrs[] = { + &dev_attr_rcd_link_cap.attr, + &dev_attr_rcd_link_ctrl.attr, + &dev_attr_rcd_link_status.attr, + NULL +}; + +static umode_t cxl_rcd_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct pci_dev *pdev = to_pci_dev(dev); + + if (is_cxl_restricted(pdev)) + return a->mode; + + return 0; +} + +static struct attribute_group cxl_rcd_group = { + .attrs = cxl_rcd_attrs, + .is_visible = cxl_rcd_visible, +}; + +__ATTRIBUTE_GROUPS(cxl_rcd); + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); @@ -806,6 +995,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(mds)) return PTR_ERR(mds); cxlds = &mds->cxlds; + device_create_file(&pdev->dev, &dev_attr_rcd_link_cap); + device_create_file(&pdev->dev, &dev_attr_rcd_link_ctrl); + device_create_file(&pdev->dev, &dev_attr_rcd_link_status); pci_set_drvdata(pdev, cxlds); cxlds->rcd = is_cxl_restricted(pdev); @@ -967,6 +1159,7 @@ static struct pci_driver cxl_pci_driver = { .err_handler = &cxl_error_handlers, .driver = { .probe_type = PROBE_PREFER_ASYNCHRONOUS, + .dev_groups = cxl_rcd_groups, }, };
This patch implements a process to output the link status information of the CXL1.1 device to sysfs. The values of the registers related to the link status are outputted into three separate files. In CXL1.1, the link status of the device is included in the RCRB mapped to the memory mapped register area. This function accesses the address where the device's RCRB is mapped. Signed-off-by: "Kobayashi,Daisuke" <kobayashi.da-06@fujitsu.com> --- drivers/cxl/pci.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 193 insertions(+)