diff mbox series

[RFC,4/9] cxl/mem: Map memory device registers

Message ID 20201111054356.793390-5-ben.widawsky@intel.com (mailing list archive)
State RFC, archived
Headers show
Series CXL 2.0 Support | expand

Commit Message

Ben Widawsky Nov. 11, 2020, 5:43 a.m. UTC
All the necessary bits are initialized in order to find and map the
register space for CXL Memory Devices. This is accomplished by using the
Register Locator DVSEC (CXL 2.0 - 8.1.9.1) to determine which PCI BAR to
use, and how much of an offset from that BAR should be added.

If the memory device registers are found and mapped a new internal data
structure tracking device state is allocated.

Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
---
 drivers/cxl/mem.c | 68 +++++++++++++++++++++++++++++++++++++++++++----
 drivers/cxl/pci.h |  6 +++++
 2 files changed, 69 insertions(+), 5 deletions(-)

Comments

Bjorn Helgaas Nov. 13, 2020, 6:17 p.m. UTC | #1
On Tue, Nov 10, 2020 at 09:43:51PM -0800, Ben Widawsky wrote:
> All the necessary bits are initialized in order to find and map the
> register space for CXL Memory Devices. This is accomplished by using the
> Register Locator DVSEC (CXL 2.0 - 8.1.9.1) to determine which PCI BAR to
> use, and how much of an offset from that BAR should be added.

"Initialize the necessary bits ..." to use the usual imperative
sentence structure, as you did in the subject.

> If the memory device registers are found and mapped a new internal data
> structure tracking device state is allocated.

"Allocate device state if we find device registers" or similar.

> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> ---
>  drivers/cxl/mem.c | 68 +++++++++++++++++++++++++++++++++++++++++++----
>  drivers/cxl/pci.h |  6 +++++
>  2 files changed, 69 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index aa7d881fa47b..8d9b9ab6c5ea 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -7,9 +7,49 @@
>  #include "pci.h"
>  
>  struct cxl_mem {
> +	struct pci_dev *pdev;
>  	void __iomem *regs;
>  };
>  
> +static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct cxl_mem *cxlm;
> +	void __iomem *regs;
> +	u64 offset;
> +	u8 bar;
> +	int rc;
> +
> +	offset = ((u64)reg_hi << 32) | (reg_lo & 0xffff0000);
> +	bar = reg_lo & 0x7;
> +
> +	/* Basic sanity check that BAR is big enough */
> +	if (pci_resource_len(pdev, bar) < offset) {
> +		dev_err(dev, "bar%d: %pr: too small (offset: %#llx)\n",
> +				bar, &pdev->resource[bar], (unsigned long long) offset);

s/bar/BAR/

> +		return ERR_PTR(-ENXIO);
> +	}
> +
> +	rc = pcim_iomap_regions(pdev, 1 << bar, pci_name(pdev));
> +	if (rc != 0) {
> +		dev_err(dev, "failed to map registers\n");
> +		return ERR_PTR(-ENXIO);
> +	}
> +
> +	cxlm = devm_kzalloc(&pdev->dev, sizeof(*cxlm), GFP_KERNEL);
> +	if (!cxlm) {
> +		dev_err(dev, "No memory available\n");
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	regs = pcim_iomap_table(pdev)[bar];
> +	cxlm->pdev = pdev;
> +	cxlm->regs = regs + offset;
> +
> +	dev_dbg(dev, "Mapped CXL Memory Device resource\n");
> +	return cxlm;
> +}
> +
>  static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
>  {
>  	int pos;
> @@ -34,9 +74,9 @@ static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
>  
>  static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  {
> +	struct cxl_mem *cxlm = ERR_PTR(-ENXIO);
>  	struct device *dev = &pdev->dev;
> -	struct cxl_mem *cxlm;

The order was better before ("dev", then "clxm").  Oh, I suppose this
is a "reverse Christmas tree" thing.

> -	int rc, regloc;
> +	int rc, regloc, i;
>  
>  	rc = cxl_bus_prepared(pdev);
>  	if (rc != 0) {
> @@ -44,15 +84,33 @@ static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  		return rc;
>  	}
>  
> +	rc = pcim_enable_device(pdev);
> +	if (rc)
> +		return rc;
> +
>  	regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC);
>  	if (!regloc) {
>  		dev_err(dev, "register location dvsec not found\n");
>  		return -ENXIO;
>  	}
> +	regloc += 0xc; /* Skip DVSEC + reserved fields */
> +
> +	for (i = regloc; i < regloc + 0x24; i += 8) {
> +		u32 reg_lo, reg_hi;
> +
> +		pci_read_config_dword(pdev, i, &reg_lo);
> +		pci_read_config_dword(pdev, i + 4, &reg_hi);
> +
> +		if (CXL_REGLOG_IS_MEMDEV(reg_lo)) {
> +			cxlm = cxl_mem_create(pdev, reg_lo, reg_hi);
> +			break;
> +		}
> +	}
> +
> +	if (IS_ERR(cxlm))
> +		return -ENXIO;

I think this would be easier to read if cxl_mem_create() returned NULL
on failure (it prints error messages and we throw away
-ENXIO/-ENOMEM distinction here anyway) so you could do:

  struct cxl_mem *cxlm = NULL;

  for (...) {
    if (...) {
      cxlm = cxl_mem_create(pdev, reg_lo, reg_hi);
      break;
    }
  }

  if (!cxlm)
    return -ENXIO;  /* -ENODEV might be more natural? */

> -	cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL);
> -	if (!cxlm)
> -		return -ENOMEM;
> +	pci_set_drvdata(pdev, cxlm);
>  
>  	return 0;
>  }
> diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h
> index beb03921e6da..be87f62e9132 100644
> --- a/drivers/cxl/pci.h
> +++ b/drivers/cxl/pci.h
> @@ -12,4 +12,10 @@
>  #define PCI_DVSEC_ID_CXL	0x0
>  #define PCI_DVSEC_ID_CXL_REGLOC	0x8
>  
> +#define CXL_REGLOG_RBI_EMPTY 0
> +#define CXL_REGLOG_RBI_COMPONENT 1
> +#define CXL_REGLOG_RBI_VIRT 2
> +#define CXL_REGLOG_RBI_MEMDEV 3

Maybe line these values up.

> +#define CXL_REGLOG_IS_MEMDEV(x) ((((x) >> 8) & 0xff) == CXL_REGLOG_RBI_MEMDEV)

If these are only needed in cxl/mem.c, they could go there.  Do you
expect code outside of drivers/cxl to need these?

>  #endif /* __CXL_PCI_H__ */
> -- 
> 2.29.2
>
Ben Widawsky Nov. 14, 2020, 1:12 a.m. UTC | #2
On 20-11-13 12:17:32, Bjorn Helgaas wrote:
> On Tue, Nov 10, 2020 at 09:43:51PM -0800, Ben Widawsky wrote:
> > All the necessary bits are initialized in order to find and map the
> > register space for CXL Memory Devices. This is accomplished by using the
> > Register Locator DVSEC (CXL 2.0 - 8.1.9.1) to determine which PCI BAR to
> > use, and how much of an offset from that BAR should be added.
> 
> "Initialize the necessary bits ..." to use the usual imperative
> sentence structure, as you did in the subject.
> 
> > If the memory device registers are found and mapped a new internal data
> > structure tracking device state is allocated.
> 
> "Allocate device state if we find device registers" or similar.
> 
> > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> > ---
> >  drivers/cxl/mem.c | 68 +++++++++++++++++++++++++++++++++++++++++++----
> >  drivers/cxl/pci.h |  6 +++++
> >  2 files changed, 69 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> > index aa7d881fa47b..8d9b9ab6c5ea 100644
> > --- a/drivers/cxl/mem.c
> > +++ b/drivers/cxl/mem.c
> > @@ -7,9 +7,49 @@
> >  #include "pci.h"
> >  
> >  struct cxl_mem {
> > +	struct pci_dev *pdev;
> >  	void __iomem *regs;
> >  };
> >  
> > +static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi)
> > +{
> > +	struct device *dev = &pdev->dev;
> > +	struct cxl_mem *cxlm;
> > +	void __iomem *regs;
> > +	u64 offset;
> > +	u8 bar;
> > +	int rc;
> > +
> > +	offset = ((u64)reg_hi << 32) | (reg_lo & 0xffff0000);
> > +	bar = reg_lo & 0x7;
> > +
> > +	/* Basic sanity check that BAR is big enough */
> > +	if (pci_resource_len(pdev, bar) < offset) {
> > +		dev_err(dev, "bar%d: %pr: too small (offset: %#llx)\n",
> > +				bar, &pdev->resource[bar], (unsigned long long) offset);
> 
> s/bar/BAR/
> 
> > +		return ERR_PTR(-ENXIO);
> > +	}
> > +
> > +	rc = pcim_iomap_regions(pdev, 1 << bar, pci_name(pdev));
> > +	if (rc != 0) {
> > +		dev_err(dev, "failed to map registers\n");
> > +		return ERR_PTR(-ENXIO);
> > +	}
> > +
> > +	cxlm = devm_kzalloc(&pdev->dev, sizeof(*cxlm), GFP_KERNEL);
> > +	if (!cxlm) {
> > +		dev_err(dev, "No memory available\n");
> > +		return ERR_PTR(-ENOMEM);
> > +	}
> > +
> > +	regs = pcim_iomap_table(pdev)[bar];
> > +	cxlm->pdev = pdev;
> > +	cxlm->regs = regs + offset;
> > +
> > +	dev_dbg(dev, "Mapped CXL Memory Device resource\n");
> > +	return cxlm;
> > +}
> > +
> >  static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
> >  {
> >  	int pos;
> > @@ -34,9 +74,9 @@ static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
> >  
> >  static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >  {
> > +	struct cxl_mem *cxlm = ERR_PTR(-ENXIO);
> >  	struct device *dev = &pdev->dev;
> > -	struct cxl_mem *cxlm;
> 
> The order was better before ("dev", then "clxm").  Oh, I suppose this
> is a "reverse Christmas tree" thing.
> 

I don't actually care either way as long as it's consistent. I tend to do
reverse Christmas tree for no particular reason.

> > -	int rc, regloc;
> > +	int rc, regloc, i;
> >  
> >  	rc = cxl_bus_prepared(pdev);
> >  	if (rc != 0) {
> > @@ -44,15 +84,33 @@ static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >  		return rc;
> >  	}
> >  
> > +	rc = pcim_enable_device(pdev);
> > +	if (rc)
> > +		return rc;
> > +
> >  	regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC);
> >  	if (!regloc) {
> >  		dev_err(dev, "register location dvsec not found\n");
> >  		return -ENXIO;
> >  	}
> > +	regloc += 0xc; /* Skip DVSEC + reserved fields */
> > +
> > +	for (i = regloc; i < regloc + 0x24; i += 8) {
> > +		u32 reg_lo, reg_hi;
> > +
> > +		pci_read_config_dword(pdev, i, &reg_lo);
> > +		pci_read_config_dword(pdev, i + 4, &reg_hi);
> > +
> > +		if (CXL_REGLOG_IS_MEMDEV(reg_lo)) {
> > +			cxlm = cxl_mem_create(pdev, reg_lo, reg_hi);
> > +			break;
> > +		}
> > +	}
> > +
> > +	if (IS_ERR(cxlm))
> > +		return -ENXIO;
> 
> I think this would be easier to read if cxl_mem_create() returned NULL
> on failure (it prints error messages and we throw away
> -ENXIO/-ENOMEM distinction here anyway) so you could do:
> 
>   struct cxl_mem *cxlm = NULL;
> 
>   for (...) {
>     if (...) {
>       cxlm = cxl_mem_create(pdev, reg_lo, reg_hi);
>       break;
>     }
>   }
> 
>   if (!cxlm)
>     return -ENXIO;  /* -ENODEV might be more natural? */
> 

I agree on both counts. Both of these came from Dan, so I will let him explain.

> > -	cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL);
> > -	if (!cxlm)
> > -		return -ENOMEM;
> > +	pci_set_drvdata(pdev, cxlm);
> >  
> >  	return 0;
> >  }
> > diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h
> > index beb03921e6da..be87f62e9132 100644
> > --- a/drivers/cxl/pci.h
> > +++ b/drivers/cxl/pci.h
> > @@ -12,4 +12,10 @@
> >  #define PCI_DVSEC_ID_CXL	0x0
> >  #define PCI_DVSEC_ID_CXL_REGLOC	0x8
> >  
> > +#define CXL_REGLOG_RBI_EMPTY 0
> > +#define CXL_REGLOG_RBI_COMPONENT 1
> > +#define CXL_REGLOG_RBI_VIRT 2
> > +#define CXL_REGLOG_RBI_MEMDEV 3
> 
> Maybe line these values up.
> 
> > +#define CXL_REGLOG_IS_MEMDEV(x) ((((x) >> 8) & 0xff) == CXL_REGLOG_RBI_MEMDEV)
> 
> If these are only needed in cxl/mem.c, they could go there.  Do you
> expect code outside of drivers/cxl to need these?

Will do.

I'll suck in everything else as they seem like improvements.

> 
> >  #endif /* __CXL_PCI_H__ */
> > -- 
> > 2.29.2
> >
Dan Williams Nov. 16, 2020, 11:19 p.m. UTC | #3
On Fri, Nov 13, 2020 at 5:12 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
>
> On 20-11-13 12:17:32, Bjorn Helgaas wrote:
> > On Tue, Nov 10, 2020 at 09:43:51PM -0800, Ben Widawsky wrote:
> > > All the necessary bits are initialized in order to find and map the
> > > register space for CXL Memory Devices. This is accomplished by using the
> > > Register Locator DVSEC (CXL 2.0 - 8.1.9.1) to determine which PCI BAR to
> > > use, and how much of an offset from that BAR should be added.
> >
> > "Initialize the necessary bits ..." to use the usual imperative
> > sentence structure, as you did in the subject.
> >
> > > If the memory device registers are found and mapped a new internal data
> > > structure tracking device state is allocated.
> >
> > "Allocate device state if we find device registers" or similar.
> >
> > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> > > ---
> > >  drivers/cxl/mem.c | 68 +++++++++++++++++++++++++++++++++++++++++++----
> > >  drivers/cxl/pci.h |  6 +++++
> > >  2 files changed, 69 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> > > index aa7d881fa47b..8d9b9ab6c5ea 100644
> > > --- a/drivers/cxl/mem.c
> > > +++ b/drivers/cxl/mem.c
> > > @@ -7,9 +7,49 @@
> > >  #include "pci.h"
> > >
> > >  struct cxl_mem {
> > > +   struct pci_dev *pdev;
> > >     void __iomem *regs;
> > >  };
> > >
> > > +static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi)
> > > +{
> > > +   struct device *dev = &pdev->dev;
> > > +   struct cxl_mem *cxlm;
> > > +   void __iomem *regs;
> > > +   u64 offset;
> > > +   u8 bar;
> > > +   int rc;
> > > +
> > > +   offset = ((u64)reg_hi << 32) | (reg_lo & 0xffff0000);
> > > +   bar = reg_lo & 0x7;
> > > +
> > > +   /* Basic sanity check that BAR is big enough */
> > > +   if (pci_resource_len(pdev, bar) < offset) {
> > > +           dev_err(dev, "bar%d: %pr: too small (offset: %#llx)\n",
> > > +                           bar, &pdev->resource[bar], (unsigned long long) offset);
> >
> > s/bar/BAR/
> >
> > > +           return ERR_PTR(-ENXIO);
> > > +   }
> > > +
> > > +   rc = pcim_iomap_regions(pdev, 1 << bar, pci_name(pdev));
> > > +   if (rc != 0) {
> > > +           dev_err(dev, "failed to map registers\n");
> > > +           return ERR_PTR(-ENXIO);
> > > +   }
> > > +
> > > +   cxlm = devm_kzalloc(&pdev->dev, sizeof(*cxlm), GFP_KERNEL);
> > > +   if (!cxlm) {
> > > +           dev_err(dev, "No memory available\n");
> > > +           return ERR_PTR(-ENOMEM);
> > > +   }
> > > +
> > > +   regs = pcim_iomap_table(pdev)[bar];
> > > +   cxlm->pdev = pdev;
> > > +   cxlm->regs = regs + offset;
> > > +
> > > +   dev_dbg(dev, "Mapped CXL Memory Device resource\n");
> > > +   return cxlm;
> > > +}
> > > +
> > >  static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
> > >  {
> > >     int pos;
> > > @@ -34,9 +74,9 @@ static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
> > >
> > >  static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> > >  {
> > > +   struct cxl_mem *cxlm = ERR_PTR(-ENXIO);
> > >     struct device *dev = &pdev->dev;
> > > -   struct cxl_mem *cxlm;
> >
> > The order was better before ("dev", then "clxm").  Oh, I suppose this
> > is a "reverse Christmas tree" thing.
> >
>
> I don't actually care either way as long as it's consistent. I tend to do
> reverse Christmas tree for no particular reason.

Yeah, reverse Christmas tree for no particular reason.

>
> > > -   int rc, regloc;
> > > +   int rc, regloc, i;
> > >
> > >     rc = cxl_bus_prepared(pdev);
> > >     if (rc != 0) {
> > > @@ -44,15 +84,33 @@ static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> > >             return rc;
> > >     }
> > >
> > > +   rc = pcim_enable_device(pdev);
> > > +   if (rc)
> > > +           return rc;
> > > +
> > >     regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC);
> > >     if (!regloc) {
> > >             dev_err(dev, "register location dvsec not found\n");
> > >             return -ENXIO;
> > >     }
> > > +   regloc += 0xc; /* Skip DVSEC + reserved fields */
> > > +
> > > +   for (i = regloc; i < regloc + 0x24; i += 8) {
> > > +           u32 reg_lo, reg_hi;
> > > +
> > > +           pci_read_config_dword(pdev, i, &reg_lo);
> > > +           pci_read_config_dword(pdev, i + 4, &reg_hi);
> > > +
> > > +           if (CXL_REGLOG_IS_MEMDEV(reg_lo)) {
> > > +                   cxlm = cxl_mem_create(pdev, reg_lo, reg_hi);
> > > +                   break;
> > > +           }
> > > +   }
> > > +
> > > +   if (IS_ERR(cxlm))
> > > +           return -ENXIO;
> >
> > I think this would be easier to read if cxl_mem_create() returned NULL
> > on failure (it prints error messages and we throw away
> > -ENXIO/-ENOMEM distinction here anyway) so you could do:
> >
> >   struct cxl_mem *cxlm = NULL;
> >
> >   for (...) {
> >     if (...) {
> >       cxlm = cxl_mem_create(pdev, reg_lo, reg_hi);
> >       break;
> >     }
> >   }
> >
> >   if (!cxlm)
> >     return -ENXIO;  /* -ENODEV might be more natural? */
> >
>
> I agree on both counts. Both of these came from Dan, so I will let him explain.

I'm not attached to differentiating -ENOMEM from -ENXIO and am ok to
drop the ERR_PTR() return. I do tend to use -ENXIO for failure to
perform an initialization action vs failure to even find the device,
but if -ENODEV seems more idiomatic to Bjorn, I won't argue.
Bjorn Helgaas Nov. 17, 2020, 12:23 a.m. UTC | #4
On Mon, Nov 16, 2020 at 03:19:41PM -0800, Dan Williams wrote:
> On Fri, Nov 13, 2020 at 5:12 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
> > On 20-11-13 12:17:32, Bjorn Helgaas wrote:
> > > On Tue, Nov 10, 2020 at 09:43:51PM -0800, Ben Widawsky wrote:

> > > >  static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> > > >  {
> > > > +   struct cxl_mem *cxlm = ERR_PTR(-ENXIO);
> > > >     struct device *dev = &pdev->dev;
> > > > -   struct cxl_mem *cxlm;
> > >
> > > The order was better before ("dev", then "clxm").  Oh, I suppose this
> > > is a "reverse Christmas tree" thing.
> > >
> >
> > I don't actually care either way as long as it's consistent. I tend to do
> > reverse Christmas tree for no particular reason.
> 
> Yeah, reverse Christmas tree for no particular reason.

FWIW, the usual drivers/pci style is to order the decls in the order
the variables are used in the code.  But this isn't drivers/pci, so
it's up to you.  I only noticed because changing the order made the
diff bigger than it needed to be.

> > > I think this would be easier to read if cxl_mem_create() returned NULL
> > > on failure (it prints error messages and we throw away
> > > -ENXIO/-ENOMEM distinction here anyway) so you could do:
> > >
> > >   struct cxl_mem *cxlm = NULL;
> > >
> > >   for (...) {
> > >     if (...) {
> > >       cxlm = cxl_mem_create(pdev, reg_lo, reg_hi);
> > >       break;
> > >     }
> > >   }
> > >
> > >   if (!cxlm)
> > >     return -ENXIO;  /* -ENODEV might be more natural? */
> > >
> >
> > I agree on both counts. Both of these came from Dan, so I will let him explain.
> 
> I'm not attached to differentiating -ENOMEM from -ENXIO and am ok to
> drop the ERR_PTR() return. I do tend to use -ENXIO for failure to
> perform an initialization action vs failure to even find the device,
> but if -ENODEV seems more idiomatic to Bjorn, I won't argue.

-ENXIO is fine with me.  I just don't see it as often so I don't
really know what it is.

Bjorn
Jonathan Cameron Nov. 17, 2020, 3 p.m. UTC | #5
On Tue, 10 Nov 2020 21:43:51 -0800
Ben Widawsky <ben.widawsky@intel.com> wrote:

> All the necessary bits are initialized in order to find and map the
> register space for CXL Memory Devices. This is accomplished by using the
> Register Locator DVSEC (CXL 2.0 - 8.1.9.1) to determine which PCI BAR to
> use, and how much of an offset from that BAR should be added.
> 
> If the memory device registers are found and mapped a new internal data
> structure tracking device state is allocated.
> 
> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> ---
>  drivers/cxl/mem.c | 68 +++++++++++++++++++++++++++++++++++++++++++----
>  drivers/cxl/pci.h |  6 +++++
>  2 files changed, 69 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index aa7d881fa47b..8d9b9ab6c5ea 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -7,9 +7,49 @@
>  #include "pci.h"
>  
>  struct cxl_mem {
> +	struct pci_dev *pdev;
>  	void __iomem *regs;
>  };
>  
> +static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct cxl_mem *cxlm;
> +	void __iomem *regs;
> +	u64 offset;
> +	u8 bar;
> +	int rc;
> +
> +	offset = ((u64)reg_hi << 32) | (reg_lo & 0xffff0000);
> +	bar = reg_lo & 0x7;
> +
> +	/* Basic sanity check that BAR is big enough */
> +	if (pci_resource_len(pdev, bar) < offset) {
> +		dev_err(dev, "bar%d: %pr: too small (offset: %#llx)\n",
> +				bar, &pdev->resource[bar], (unsigned long long) offset);
> +		return ERR_PTR(-ENXIO);
> +	}
> +
> +	rc = pcim_iomap_regions(pdev, 1 << bar, pci_name(pdev));
> +	if (rc != 0) {
> +		dev_err(dev, "failed to map registers\n");
> +		return ERR_PTR(-ENXIO);
> +	}
> +
> +	cxlm = devm_kzalloc(&pdev->dev, sizeof(*cxlm), GFP_KERNEL);
> +	if (!cxlm) {
> +		dev_err(dev, "No memory available\n");
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	regs = pcim_iomap_table(pdev)[bar];
> +	cxlm->pdev = pdev;
> +	cxlm->regs = regs + offset;
> +
> +	dev_dbg(dev, "Mapped CXL Memory Device resource\n");
> +	return cxlm;
> +}
> +
>  static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
>  {
>  	int pos;
> @@ -34,9 +74,9 @@ static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
>  
>  static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  {
> +	struct cxl_mem *cxlm = ERR_PTR(-ENXIO);
>  	struct device *dev = &pdev->dev;
> -	struct cxl_mem *cxlm;
> -	int rc, regloc;
> +	int rc, regloc, i;
>  
>  	rc = cxl_bus_prepared(pdev);
>  	if (rc != 0) {
> @@ -44,15 +84,33 @@ static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  		return rc;
>  	}
>  
> +	rc = pcim_enable_device(pdev);
> +	if (rc)
> +		return rc;
> +
>  	regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC);
>  	if (!regloc) {
>  		dev_err(dev, "register location dvsec not found\n");
>  		return -ENXIO;
>  	}
> +	regloc += 0xc; /* Skip DVSEC + reserved fields */
> +
> +	for (i = regloc; i < regloc + 0x24; i += 8) {
> +		u32 reg_lo, reg_hi;

Hmm. That "register offset low" naming in the spec is just designed to confuse
given lots of other things packed in the register.
Perhaps a comment here to say it contains other information?
Also possibly some docs for cxl_mem_create to make the same point there.

> +
> +		pci_read_config_dword(pdev, i, &reg_lo);
> +		pci_read_config_dword(pdev, i + 4, &reg_hi);
> +
> +		if (CXL_REGLOG_IS_MEMDEV(reg_lo)) {
> +			cxlm = cxl_mem_create(pdev, reg_lo, reg_hi);
> +			break;
> +		}
> +	}
> +
> +	if (IS_ERR(cxlm))
> +		return -ENXIO;
>  
> -	cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL);
> -	if (!cxlm)
> -		return -ENOMEM;
> +	pci_set_drvdata(pdev, cxlm);

I could be wrong but don't think this is used yet.  I'd prefer to see
it introduced only when it is.  Makes it easy to match up without
having to search back in earlier patches.

>  
>  	return 0;
>  }
> diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h
> index beb03921e6da..be87f62e9132 100644
> --- a/drivers/cxl/pci.h
> +++ b/drivers/cxl/pci.h
> @@ -12,4 +12,10 @@
>  #define PCI_DVSEC_ID_CXL	0x0
>  #define PCI_DVSEC_ID_CXL_REGLOC	0x8
>  
> +#define CXL_REGLOG_RBI_EMPTY 0

As in the QEMU patches, please add a comment on what RBI means
here. It's non obvious even just after you've read through the spec!

> +#define CXL_REGLOG_RBI_COMPONENT 1
> +#define CXL_REGLOG_RBI_VIRT 2
> +#define CXL_REGLOG_RBI_MEMDEV 3
> +#define CXL_REGLOG_IS_MEMDEV(x) ((((x) >> 8) & 0xff) == CXL_REGLOG_RBI_MEMDEV)
> +
>  #endif /* __CXL_PCI_H__ */
Ben Widawsky Nov. 23, 2020, 7:20 p.m. UTC | #6
On 20-11-16 18:23:21, Bjorn Helgaas wrote:
> On Mon, Nov 16, 2020 at 03:19:41PM -0800, Dan Williams wrote:
> > On Fri, Nov 13, 2020 at 5:12 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
> > > On 20-11-13 12:17:32, Bjorn Helgaas wrote:
> > > > On Tue, Nov 10, 2020 at 09:43:51PM -0800, Ben Widawsky wrote:
> 
> > > > >  static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> > > > >  {
> > > > > +   struct cxl_mem *cxlm = ERR_PTR(-ENXIO);
> > > > >     struct device *dev = &pdev->dev;
> > > > > -   struct cxl_mem *cxlm;
> > > >
> > > > The order was better before ("dev", then "clxm").  Oh, I suppose this
> > > > is a "reverse Christmas tree" thing.
> > > >
> > >
> > > I don't actually care either way as long as it's consistent. I tend to do
> > > reverse Christmas tree for no particular reason.
> > 
> > Yeah, reverse Christmas tree for no particular reason.
> 
> FWIW, the usual drivers/pci style is to order the decls in the order
> the variables are used in the code.  But this isn't drivers/pci, so
> it's up to you.  I only noticed because changing the order made the
> diff bigger than it needed to be.
> 
> > > > I think this would be easier to read if cxl_mem_create() returned NULL
> > > > on failure (it prints error messages and we throw away
> > > > -ENXIO/-ENOMEM distinction here anyway) so you could do:
> > > >
> > > >   struct cxl_mem *cxlm = NULL;
> > > >
> > > >   for (...) {
> > > >     if (...) {
> > > >       cxlm = cxl_mem_create(pdev, reg_lo, reg_hi);
> > > >       break;
> > > >     }
> > > >   }
> > > >
> > > >   if (!cxlm)
> > > >     return -ENXIO;  /* -ENODEV might be more natural? */
> > > >
> > >
> > > I agree on both counts. Both of these came from Dan, so I will let him explain.
> > 
> > I'm not attached to differentiating -ENOMEM from -ENXIO and am ok to
> > drop the ERR_PTR() return. I do tend to use -ENXIO for failure to
> > perform an initialization action vs failure to even find the device,
> > but if -ENODEV seems more idiomatic to Bjorn, I won't argue.
> 
> -ENXIO is fine with me.  I just don't see it as often so I don't
> really know what it is.
> 
> Bjorn

Dan, Bjorn, I did a fairly randomized look at various probe functions and ENODEV
seems to be more common. My sort of historical use has been
- ENODEV: General, couldn't establish device presence
- ENXIO: Device was there but something is totally misconfigured
- E*: A matching errno for exactly what went wrong

My question though is, would it be useful to propagate the error up through
probe?
Dan Williams Nov. 23, 2020, 7:32 p.m. UTC | #7
On Mon, Nov 23, 2020 at 11:20 AM Ben Widawsky <ben.widawsky@intel.com> wrote:
[..]
> > -ENXIO is fine with me.  I just don't see it as often so I don't
> > really know what it is.
> >
> > Bjorn
>
> Dan, Bjorn, I did a fairly randomized look at various probe functions and ENODEV
> seems to be more common. My sort of historical use has been
> - ENODEV: General, couldn't establish device presence
> - ENXIO: Device was there but something is totally misconfigured
> - E*: A matching errno for exactly what went wrong
>
> My question though is, would it be useful to propagate the error up through
> probe?

The error from probe becomes the modprobe exit code, or the write to
the 'bind' attribute errno. So, it's a choice between "No such device
or address", or "No such device". The "or address" mention makes a
small bit more sense to me since the device is obviously present as it
is visible in lspci, but either error code clearly indicates a driver
problem so ENODEV is fine.

For the other error codes I think it would be confusing to return
something like EINVAL from probe as that would be mistaken as an
invalid argument to the modprobe without stracing to see that it came
from the result of a sysfs write
Ben Widawsky Nov. 23, 2020, 7:58 p.m. UTC | #8
On 20-11-23 11:32:33, Dan Williams wrote:
> On Mon, Nov 23, 2020 at 11:20 AM Ben Widawsky <ben.widawsky@intel.com> wrote:
> [..]
> > > -ENXIO is fine with me.  I just don't see it as often so I don't
> > > really know what it is.
> > >
> > > Bjorn
> >
> > Dan, Bjorn, I did a fairly randomized look at various probe functions and ENODEV
> > seems to be more common. My sort of historical use has been
> > - ENODEV: General, couldn't establish device presence
> > - ENXIO: Device was there but something is totally misconfigured
> > - E*: A matching errno for exactly what went wrong
> >
> > My question though is, would it be useful to propagate the error up through
> > probe?
> 
> The error from probe becomes the modprobe exit code, or the write to
> the 'bind' attribute errno. So, it's a choice between "No such device
> or address", or "No such device". The "or address" mention makes a
> small bit more sense to me since the device is obviously present as it
> is visible in lspci, but either error code clearly indicates a driver
> problem so ENODEV is fine.
> 
> For the other error codes I think it would be confusing to return
> something like EINVAL from probe as that would be mistaken as an
> invalid argument to the modprobe without stracing to see that it came
> from the result of a sysfs write

Currently in this path there are 2 general reasons for failure:
1. Driver internal problem, ENOMEM or some such.
2. Device problem (the memory device capability isn't present).

I think I'll return ENODEV for the former and ENXIO for the latter. If that
sounds good to everyone else.
diff mbox series

Patch

diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index aa7d881fa47b..8d9b9ab6c5ea 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -7,9 +7,49 @@ 
 #include "pci.h"
 
 struct cxl_mem {
+	struct pci_dev *pdev;
 	void __iomem *regs;
 };
 
+static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi)
+{
+	struct device *dev = &pdev->dev;
+	struct cxl_mem *cxlm;
+	void __iomem *regs;
+	u64 offset;
+	u8 bar;
+	int rc;
+
+	offset = ((u64)reg_hi << 32) | (reg_lo & 0xffff0000);
+	bar = reg_lo & 0x7;
+
+	/* Basic sanity check that BAR is big enough */
+	if (pci_resource_len(pdev, bar) < offset) {
+		dev_err(dev, "bar%d: %pr: too small (offset: %#llx)\n",
+				bar, &pdev->resource[bar], (unsigned long long) offset);
+		return ERR_PTR(-ENXIO);
+	}
+
+	rc = pcim_iomap_regions(pdev, 1 << bar, pci_name(pdev));
+	if (rc != 0) {
+		dev_err(dev, "failed to map registers\n");
+		return ERR_PTR(-ENXIO);
+	}
+
+	cxlm = devm_kzalloc(&pdev->dev, sizeof(*cxlm), GFP_KERNEL);
+	if (!cxlm) {
+		dev_err(dev, "No memory available\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	regs = pcim_iomap_table(pdev)[bar];
+	cxlm->pdev = pdev;
+	cxlm->regs = regs + offset;
+
+	dev_dbg(dev, "Mapped CXL Memory Device resource\n");
+	return cxlm;
+}
+
 static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
 {
 	int pos;
@@ -34,9 +74,9 @@  static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
 
 static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
+	struct cxl_mem *cxlm = ERR_PTR(-ENXIO);
 	struct device *dev = &pdev->dev;
-	struct cxl_mem *cxlm;
-	int rc, regloc;
+	int rc, regloc, i;
 
 	rc = cxl_bus_prepared(pdev);
 	if (rc != 0) {
@@ -44,15 +84,33 @@  static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return rc;
 	}
 
+	rc = pcim_enable_device(pdev);
+	if (rc)
+		return rc;
+
 	regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC);
 	if (!regloc) {
 		dev_err(dev, "register location dvsec not found\n");
 		return -ENXIO;
 	}
+	regloc += 0xc; /* Skip DVSEC + reserved fields */
+
+	for (i = regloc; i < regloc + 0x24; i += 8) {
+		u32 reg_lo, reg_hi;
+
+		pci_read_config_dword(pdev, i, &reg_lo);
+		pci_read_config_dword(pdev, i + 4, &reg_hi);
+
+		if (CXL_REGLOG_IS_MEMDEV(reg_lo)) {
+			cxlm = cxl_mem_create(pdev, reg_lo, reg_hi);
+			break;
+		}
+	}
+
+	if (IS_ERR(cxlm))
+		return -ENXIO;
 
-	cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL);
-	if (!cxlm)
-		return -ENOMEM;
+	pci_set_drvdata(pdev, cxlm);
 
 	return 0;
 }
diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h
index beb03921e6da..be87f62e9132 100644
--- a/drivers/cxl/pci.h
+++ b/drivers/cxl/pci.h
@@ -12,4 +12,10 @@ 
 #define PCI_DVSEC_ID_CXL	0x0
 #define PCI_DVSEC_ID_CXL_REGLOC	0x8
 
+#define CXL_REGLOG_RBI_EMPTY 0
+#define CXL_REGLOG_RBI_COMPONENT 1
+#define CXL_REGLOG_RBI_VIRT 2
+#define CXL_REGLOG_RBI_MEMDEV 3
+#define CXL_REGLOG_IS_MEMDEV(x) ((((x) >> 8) & 0xff) == CXL_REGLOG_RBI_MEMDEV)
+
 #endif /* __CXL_PCI_H__ */