diff mbox series

[net-next,v3,1/3] ice: add ice_adapter for shared data across PFs on the same NIC

Message ID 20240307222510.53654-2-mschmidt@redhat.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series ice: lighter locking for PTP time reading | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 940 this patch: 940
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 3 maintainers not CCed: pabeni@redhat.com anthony.l.nguyen@intel.com edumazet@google.com
netdev/build_clang success Errors and warnings before: 957 this patch: 957
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 956 this patch: 956
netdev/checkpatch warning CHECK: spaces preferred around that '*' (ctx:WxV) WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: suspect code indent for conditional statements (0, 0)
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Michal Schmidt March 7, 2024, 10:25 p.m. UTC
There is a need for synchronization between ice PFs on the same physical
adapter.

Add a "struct ice_adapter" for holding data shared between PFs of the
same multifunction PCI device. The struct is refcounted - each ice_pf
holds a reference to it.

Its first use will be for PTP. I expect it will be useful also to
improve the ugliness that is ice_prot_id_tbl.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
---
 drivers/net/ethernet/intel/ice/Makefile      |   3 +-
 drivers/net/ethernet/intel/ice/ice.h         |   2 +
 drivers/net/ethernet/intel/ice/ice_adapter.c | 107 +++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_adapter.h |  22 ++++
 drivers/net/ethernet/intel/ice/ice_main.c    |   8 ++
 5 files changed, 141 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.h

Comments

Marcin Szycik March 8, 2024, 10:57 a.m. UTC | #1
On 07.03.2024 23:25, Michal Schmidt wrote:
> There is a need for synchronization between ice PFs on the same physical
> adapter.
> 
> Add a "struct ice_adapter" for holding data shared between PFs of the
> same multifunction PCI device. The struct is refcounted - each ice_pf
> holds a reference to it.
> 
> Its first use will be for PTP. I expect it will be useful also to
> improve the ugliness that is ice_prot_id_tbl.
> 
> Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
> ---
>  drivers/net/ethernet/intel/ice/Makefile      |   3 +-
>  drivers/net/ethernet/intel/ice/ice.h         |   2 +
>  drivers/net/ethernet/intel/ice/ice_adapter.c | 107 +++++++++++++++++++
>  drivers/net/ethernet/intel/ice/ice_adapter.h |  22 ++++
>  drivers/net/ethernet/intel/ice/ice_main.c    |   8 ++
>  5 files changed, 141 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.c
>  create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.h
> 
> diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
> index cddd82d4ca0f..4fa09c321440 100644
> --- a/drivers/net/ethernet/intel/ice/Makefile
> +++ b/drivers/net/ethernet/intel/ice/Makefile
> @@ -36,7 +36,8 @@ ice-y := ice_main.o	\
>  	 ice_repr.o	\
>  	 ice_tc_lib.o	\
>  	 ice_fwlog.o	\
> -	 ice_debugfs.o
> +	 ice_debugfs.o  \
> +	 ice_adapter.o
>  ice-$(CONFIG_PCI_IOV) +=	\
>  	ice_sriov.o		\
>  	ice_virtchnl.o		\
> diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
> index 365c03d1c462..1ffecbdd361a 100644
> --- a/drivers/net/ethernet/intel/ice/ice.h
> +++ b/drivers/net/ethernet/intel/ice/ice.h
> @@ -77,6 +77,7 @@
>  #include "ice_gnss.h"
>  #include "ice_irq.h"
>  #include "ice_dpll.h"
> +#include "ice_adapter.h"
>  
>  #define ICE_BAR0		0
>  #define ICE_REQ_DESC_MULTIPLE	32
> @@ -544,6 +545,7 @@ struct ice_agg_node {
>  
>  struct ice_pf {
>  	struct pci_dev *pdev;
> +	struct ice_adapter *adapter;
>  
>  	struct devlink_region *nvm_region;
>  	struct devlink_region *sram_region;
> diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
> new file mode 100644
> index 000000000000..6b9eeba6edf7
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
> @@ -0,0 +1,107 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +// SPDX-FileCopyrightText: Copyright Red Hat
> +
> +#include <linux/cleanup.h>
> +#include <linux/mutex.h>
> +#include <linux/pci.h>
> +#include <linux/slab.h>
> +#include <linux/xarray.h>
> +#include "ice_adapter.h"
> +
> +static DEFINE_XARRAY(ice_adapters);
> +
> +static unsigned long ice_adapter_index(const struct pci_dev *pdev)
> +{
> +	unsigned int domain = pci_domain_nr(pdev->bus);
> +
> +	WARN_ON((unsigned long)domain >> (BITS_PER_LONG - 13));
> +	return ((unsigned long)domain << 13) |
> +	       ((unsigned long)pdev->bus->number << 5) |

Magic numbers?

> +	       PCI_SLOT(pdev->devfn);
> +}
> +
> +static struct ice_adapter *ice_adapter_new(void)
> +{
> +	struct ice_adapter *adapter;
> +
> +	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
> +	if (!adapter)
> +		return NULL;
> +
> +	refcount_set(&adapter->refcount, 1);
> +
> +	return adapter;
> +}
> +
> +static void ice_adapter_free(struct ice_adapter *adapter)
> +{
> +	kfree(adapter);
> +}
> +
> +DEFINE_FREE(ice_adapter_free, struct ice_adapter*, if (_T) ice_adapter_free(_T))
> +
> +/**
> + * ice_adapter_get - Get a shared ice_adapter structure.
> + * @pdev: Pointer to the pci_dev whose driver is getting the ice_adapter.
> + *
> + * Gets a pointer to a shared ice_adapter structure. Physical functions (PFs)
> + * of the same multi-function PCI device share one ice_adapter structure.
> + * The ice_adapter is reference-counted. The PF driver must use ice_adapter_put
> + * to release its reference.
> + *
> + * Context: Process, may sleep.
> + * Return:  Pointer to ice_adapter on success.
> + *          ERR_PTR() on error. -ENOMEM is the only possible error.

What about ERR_PTR(xa_err(ret))?

> + */
> +struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
> +{
> +	struct ice_adapter *ret, __free(ice_adapter_free) *adapter = NULL;
> +	unsigned long index = ice_adapter_index(pdev);
> +
> +	adapter = ice_adapter_new();
> +	if (!adapter)
> +		return ERR_PTR(-ENOMEM);

---8<---

Thanks,
Marcin
Przemek Kitszel March 8, 2024, 12:17 p.m. UTC | #2
On 3/7/24 23:25, Michal Schmidt wrote:
> There is a need for synchronization between ice PFs on the same physical
> adapter.
> 
> Add a "struct ice_adapter" for holding data shared between PFs of the
> same multifunction PCI device. The struct is refcounted - each ice_pf
> holds a reference to it.
> 
> Its first use will be for PTP. I expect it will be useful also to
> improve the ugliness that is ice_prot_id_tbl.
> 
> Signed-off-by: Michal Schmidt <mschmidt@redhat.com>

Thank you very much for this series, we have spotted the need for
something like that very recently, I have already pinged our PTP folks
to take a look. (+CC Sergey)

Why not wipe ice_ptp_lock() entirely?
(could be left for Intel folks though)

please find the usual code related feedback inline
(again, I really appreciate and I am grateful for this series)

> ---
>   drivers/net/ethernet/intel/ice/Makefile      |   3 +-
>   drivers/net/ethernet/intel/ice/ice.h         |   2 +
>   drivers/net/ethernet/intel/ice/ice_adapter.c | 107 +++++++++++++++++++
>   drivers/net/ethernet/intel/ice/ice_adapter.h |  22 ++++
>   drivers/net/ethernet/intel/ice/ice_main.c    |   8 ++
>   5 files changed, 141 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.c
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.h
> 
> diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
> index cddd82d4ca0f..4fa09c321440 100644
> --- a/drivers/net/ethernet/intel/ice/Makefile
> +++ b/drivers/net/ethernet/intel/ice/Makefile
> @@ -36,7 +36,8 @@ ice-y := ice_main.o	\
>   	 ice_repr.o	\
>   	 ice_tc_lib.o	\
>   	 ice_fwlog.o	\
> -	 ice_debugfs.o
> +	 ice_debugfs.o  \
> +	 ice_adapter.o
>   ice-$(CONFIG_PCI_IOV) +=	\
>   	ice_sriov.o		\
>   	ice_virtchnl.o		\
> diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
> index 365c03d1c462..1ffecbdd361a 100644
> --- a/drivers/net/ethernet/intel/ice/ice.h
> +++ b/drivers/net/ethernet/intel/ice/ice.h
> @@ -77,6 +77,7 @@
>   #include "ice_gnss.h"
>   #include "ice_irq.h"
>   #include "ice_dpll.h"
> +#include "ice_adapter.h"
>   
>   #define ICE_BAR0		0
>   #define ICE_REQ_DESC_MULTIPLE	32
> @@ -544,6 +545,7 @@ struct ice_agg_node {
>   
>   struct ice_pf {
>   	struct pci_dev *pdev;
> +	struct ice_adapter *adapter;
>   
>   	struct devlink_region *nvm_region;
>   	struct devlink_region *sram_region;
> diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
> new file mode 100644
> index 000000000000..6b9eeba6edf7
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
> @@ -0,0 +1,107 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +// SPDX-FileCopyrightText: Copyright Red Hat
> +
> +#include <linux/cleanup.h>
> +#include <linux/mutex.h>
> +#include <linux/pci.h>
> +#include <linux/slab.h>
> +#include <linux/xarray.h>
> +#include "ice_adapter.h"
> +
> +static DEFINE_XARRAY(ice_adapters);
> +
> +static unsigned long ice_adapter_index(const struct pci_dev *pdev)
> +{
> +	unsigned int domain = pci_domain_nr(pdev->bus);
> +
> +	WARN_ON((unsigned long)domain >> (BITS_PER_LONG - 13));
> +	return ((unsigned long)domain << 13) |
> +	       ((unsigned long)pdev->bus->number << 5) |
> +	       PCI_SLOT(pdev->devfn);

xarray is free to use non-0-based indices, so this whole function could
be simplified as:

/* note the PCI_SLOT() call to clear function from devfn */
return PCI_DEVID(pci_domain_nr(pdev->bus), PCI_SLOT(pdev->devfn));


> +}
> +
> +static struct ice_adapter *ice_adapter_new(void)
> +{
> +	struct ice_adapter *adapter;
> +
> +	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
> +	if (!adapter)
> +		return NULL;
> +
> +	refcount_set(&adapter->refcount, 1);
> +
> +	return adapter;
> +}
> +
> +static void ice_adapter_free(struct ice_adapter *adapter)
> +{
> +	kfree(adapter);
> +}

I would say that this is too thin wrapper for "kernel interface" (memory
ptr) to warrant it, IOW just place kfree in place of ice_adapter_free,
that will also free us from the need to use DEFINE_FREE()

> +
> +DEFINE_FREE(ice_adapter_free, struct ice_adapter*, if (_T) ice_adapter_free(_T))
> +
> +/**
> + * ice_adapter_get - Get a shared ice_adapter structure.
> + * @pdev: Pointer to the pci_dev whose driver is getting the ice_adapter.
> + *
> + * Gets a pointer to a shared ice_adapter structure. Physical functions (PFs)
> + * of the same multi-function PCI device share one ice_adapter structure.
> + * The ice_adapter is reference-counted. The PF driver must use ice_adapter_put
> + * to release its reference.
> + *
> + * Context: Process, may sleep.
> + * Return:  Pointer to ice_adapter on success.
> + *          ERR_PTR() on error. -ENOMEM is the only possible error.

that's inconvenient, to the point that it will be better to have a dummy
static entry used for this purpose, but I see that this is something
broader that this particular use case, so - feel free to ignore

> + */
> +struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
> +{
> +	struct ice_adapter *ret, __free(ice_adapter_free) *adapter = NULL;
> +	unsigned long index = ice_adapter_index(pdev);
> +
> +	adapter = ice_adapter_new();
> +	if (!adapter)
> +		return ERR_PTR(-ENOMEM);
> +
> +	xa_lock(&ice_adapters);
> +	ret = __xa_cmpxchg(&ice_adapters, index, NULL, adapter, GFP_KERNEL);
> +	if (xa_is_err(ret)) {
> +		ret = ERR_PTR(xa_err(ret));
> +		goto unlock;
> +	}
> +	if (ret) {
> +		refcount_inc(&ret->refcount);
> +		goto unlock;
> +	}
> +	ret = no_free_ptr(adapter);

nice solution, but this is an idiom that we want across the kernel
instead of opting out of auto management in such cases as this one?
(esp that you have open-coded locking anyway)

I would expect to have explicit two stores (first to ensure index is
present, second to overwrite entry if null) easier than cmpxchg
+ unneeded allocation (that could cause whole function to fail!)


> +unlock:
> +	xa_unlock(&ice_adapters);
> +	return ret;
> +}
> +
> +/**
> + * ice_adapter_put - Release a reference to the shared ice_adapter structure.
> + * @pdev: Pointer to the pci_dev whose driver is releasing the ice_adapter.
> + *
> + * Releases the reference to ice_adapter previously obtained with
> + * ice_adapter_get.
> + *
> + * Context: Any.
> + */
> +void ice_adapter_put(const struct pci_dev *pdev)
> +{
> +	unsigned long index = ice_adapter_index(pdev);
> +	struct ice_adapter *adapter;
> +
> +	xa_lock(&ice_adapters);
> +	adapter = xa_load(&ice_adapters, index);
> +	if (WARN_ON(!adapter))
> +		goto unlock;
> +
> +	if (!refcount_dec_and_test(&adapter->refcount))
> +		goto unlock;
> +
> +	WARN_ON(__xa_erase(&ice_adapters, index) != adapter);
> +	ice_adapter_free(adapter);
> +unlock:
> +	xa_unlock(&ice_adapters);
> +}
> diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h
> new file mode 100644
> index 000000000000..cb5a02eb24c1
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h
> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/* SPDX-FileCopyrightText: Copyright Red Hat */
> +
> +#ifndef _ICE_ADAPTER_H_
> +#define _ICE_ADAPTER_H_
> +
> +#include <linux/refcount_types.h>
> +
> +struct pci_dev;
> +
> +/**
> + * struct ice_adapter - PCI adapter resources shared across PFs
> + * @refcount: Reference count. struct ice_pf objects hold the references.
> + */
> +struct ice_adapter {
> +	refcount_t refcount;

this is refcounted always under a lock, so could be plain "int",
but not a big deal

> +};
> +
> +struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev);
> +void ice_adapter_put(const struct pci_dev *pdev);
> +
> +#endif /* _ICE_ADAPTER_H */
> diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
> index 8f73ba77e835..a3c545e56256 100644
> --- a/drivers/net/ethernet/intel/ice/ice_main.c
> +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> @@ -5093,6 +5093,7 @@ static int
>   ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
>   {
>   	struct device *dev = &pdev->dev;
> +	struct ice_adapter *adapter;
>   	struct ice_pf *pf;
>   	struct ice_hw *hw;
>   	int err;
> @@ -5145,7 +5146,12 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
>   
>   	pci_set_master(pdev);
>   
> +	adapter = ice_adapter_get(pdev);
> +	if (IS_ERR(adapter))
> +		return PTR_ERR(adapter);
> +
>   	pf->pdev = pdev;
> +	pf->adapter = adapter;
>   	pci_set_drvdata(pdev, pf);
>   	set_bit(ICE_DOWN, pf->state);
>   	/* Disable service task until DOWN bit is cleared */
> @@ -5196,6 +5202,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
>   err_load:
>   	ice_deinit(pf);
>   err_init:
> +	ice_adapter_put(pdev);
>   	pci_disable_device(pdev);
>   	return err;
>   }
> @@ -5302,6 +5309,7 @@ static void ice_remove(struct pci_dev *pdev)
>   	ice_setup_mc_magic_wake(pf);
>   	ice_set_wake(pf);
>   
> +	ice_adapter_put(pdev);
>   	pci_disable_device(pdev);
>   }
>
Michal Schmidt March 8, 2024, 1:35 p.m. UTC | #3
On Fri, Mar 8, 2024 at 11:57 AM Marcin Szycik
<marcin.szycik@linux.intel.com> wrote:
> On 07.03.2024 23:25, Michal Schmidt wrote:
> > There is a need for synchronization between ice PFs on the same physical
> > adapter.
> >
> > Add a "struct ice_adapter" for holding data shared between PFs of the
> > same multifunction PCI device. The struct is refcounted - each ice_pf
> > holds a reference to it.
> >
> > Its first use will be for PTP. I expect it will be useful also to
> > improve the ugliness that is ice_prot_id_tbl.
> >
> > Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
> > ---
> >  drivers/net/ethernet/intel/ice/Makefile      |   3 +-
> >  drivers/net/ethernet/intel/ice/ice.h         |   2 +
> >  drivers/net/ethernet/intel/ice/ice_adapter.c | 107 +++++++++++++++++++
> >  drivers/net/ethernet/intel/ice/ice_adapter.h |  22 ++++
> >  drivers/net/ethernet/intel/ice/ice_main.c    |   8 ++
> >  5 files changed, 141 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.c
> >  create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.h
> >
> > diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
> > index cddd82d4ca0f..4fa09c321440 100644
> > --- a/drivers/net/ethernet/intel/ice/Makefile
> > +++ b/drivers/net/ethernet/intel/ice/Makefile
> > @@ -36,7 +36,8 @@ ice-y := ice_main.o \
> >        ice_repr.o     \
> >        ice_tc_lib.o   \
> >        ice_fwlog.o    \
> > -      ice_debugfs.o
> > +      ice_debugfs.o  \
> > +      ice_adapter.o
> >  ice-$(CONFIG_PCI_IOV) +=     \
> >       ice_sriov.o             \
> >       ice_virtchnl.o          \
> > diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
> > index 365c03d1c462..1ffecbdd361a 100644
> > --- a/drivers/net/ethernet/intel/ice/ice.h
> > +++ b/drivers/net/ethernet/intel/ice/ice.h
> > @@ -77,6 +77,7 @@
> >  #include "ice_gnss.h"
> >  #include "ice_irq.h"
> >  #include "ice_dpll.h"
> > +#include "ice_adapter.h"
> >
> >  #define ICE_BAR0             0
> >  #define ICE_REQ_DESC_MULTIPLE        32
> > @@ -544,6 +545,7 @@ struct ice_agg_node {
> >
> >  struct ice_pf {
> >       struct pci_dev *pdev;
> > +     struct ice_adapter *adapter;
> >
> >       struct devlink_region *nvm_region;
> >       struct devlink_region *sram_region;
> > diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
> > new file mode 100644
> > index 000000000000..6b9eeba6edf7
> > --- /dev/null
> > +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
> > @@ -0,0 +1,107 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +// SPDX-FileCopyrightText: Copyright Red Hat
> > +
> > +#include <linux/cleanup.h>
> > +#include <linux/mutex.h>
> > +#include <linux/pci.h>
> > +#include <linux/slab.h>
> > +#include <linux/xarray.h>
> > +#include "ice_adapter.h"
> > +
> > +static DEFINE_XARRAY(ice_adapters);
> > +
> > +static unsigned long ice_adapter_index(const struct pci_dev *pdev)
> > +{
> > +     unsigned int domain = pci_domain_nr(pdev->bus);
> > +
> > +     WARN_ON((unsigned long)domain >> (BITS_PER_LONG - 13));
> > +     return ((unsigned long)domain << 13) |
> > +            ((unsigned long)pdev->bus->number << 5) |
>
> Magic numbers?

5 bits for the slot number, 8 bits for the bus number. 5+18=13.
I did not find any existing definitions for this purpose in pci.h, but
I can add some local macros.

> > +            PCI_SLOT(pdev->devfn);
> > +}
> > +
> > +static struct ice_adapter *ice_adapter_new(void)
> > +{
> > +     struct ice_adapter *adapter;
> > +
> > +     adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
> > +     if (!adapter)
> > +             return NULL;
> > +
> > +     refcount_set(&adapter->refcount, 1);
> > +
> > +     return adapter;
> > +}
> > +
> > +static void ice_adapter_free(struct ice_adapter *adapter)
> > +{
> > +     kfree(adapter);
> > +}
> > +
> > +DEFINE_FREE(ice_adapter_free, struct ice_adapter*, if (_T) ice_adapter_free(_T))
> > +
> > +/**
> > + * ice_adapter_get - Get a shared ice_adapter structure.
> > + * @pdev: Pointer to the pci_dev whose driver is getting the ice_adapter.
> > + *
> > + * Gets a pointer to a shared ice_adapter structure. Physical functions (PFs)
> > + * of the same multi-function PCI device share one ice_adapter structure.
> > + * The ice_adapter is reference-counted. The PF driver must use ice_adapter_put
> > + * to release its reference.
> > + *
> > + * Context: Process, may sleep.
> > + * Return:  Pointer to ice_adapter on success.
> > + *          ERR_PTR() on error. -ENOMEM is the only possible error.
>
> What about ERR_PTR(xa_err(ret))?

The Xarray call can fail with -EINVAL or -ENOMEM. The -EINVAL would be
the result only if I'd attempt to insert an unaligned pointer, which
I'm not doing, so that leaves -ENOMEM as the only possible error.

> > + */
> > +struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
> > +{
> > +     struct ice_adapter *ret, __free(ice_adapter_free) *adapter = NULL;
> > +     unsigned long index = ice_adapter_index(pdev);
> > +
> > +     adapter = ice_adapter_new();
> > +     if (!adapter)
> > +             return ERR_PTR(-ENOMEM);
>
> ---8<---
>
> Thanks,
> Marcin
>
Michal Schmidt March 8, 2024, 2:18 p.m. UTC | #4
On Fri, Mar 8, 2024 at 1:17 PM Przemek Kitszel
<przemyslaw.kitszel@intel.com> wrote:
> On 3/7/24 23:25, Michal Schmidt wrote:
> > There is a need for synchronization between ice PFs on the same physical
> > adapter.
> >
> > Add a "struct ice_adapter" for holding data shared between PFs of the
> > same multifunction PCI device. The struct is refcounted - each ice_pf
> > holds a reference to it.
> >
> > Its first use will be for PTP. I expect it will be useful also to
> > improve the ugliness that is ice_prot_id_tbl.
> >
> > Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
>
> Thank you very much for this series, we have spotted the need for
> something like that very recently, I have already pinged our PTP folks
> to take a look. (+CC Sergey)
>
> Why not wipe ice_ptp_lock() entirely?
> (could be left for Intel folks though)

I am doing this too, just not yet in this series I posted, because I
did not want to expand the scope of the series between reviews. See my
gitlab branch I linked in the cover letter, specifically this patch:
https://gitlab.com/mschmidt2/linux/-/commit/89a1bd2904ac8b0614bcfc2fce464bf5f60b0f0c

> please find the usual code related feedback inline
> (again, I really appreciate and I am grateful for this series)
>
> > ---
> >   drivers/net/ethernet/intel/ice/Makefile      |   3 +-
> >   drivers/net/ethernet/intel/ice/ice.h         |   2 +
> >   drivers/net/ethernet/intel/ice/ice_adapter.c | 107 +++++++++++++++++++
> >   drivers/net/ethernet/intel/ice/ice_adapter.h |  22 ++++
> >   drivers/net/ethernet/intel/ice/ice_main.c    |   8 ++
> >   5 files changed, 141 insertions(+), 1 deletion(-)
> >   create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.c
> >   create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.h
> >
> > diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
> > index cddd82d4ca0f..4fa09c321440 100644
> > --- a/drivers/net/ethernet/intel/ice/Makefile
> > +++ b/drivers/net/ethernet/intel/ice/Makefile
> > @@ -36,7 +36,8 @@ ice-y := ice_main.o \
> >        ice_repr.o     \
> >        ice_tc_lib.o   \
> >        ice_fwlog.o    \
> > -      ice_debugfs.o
> > +      ice_debugfs.o  \
> > +      ice_adapter.o
> >   ice-$(CONFIG_PCI_IOV) +=    \
> >       ice_sriov.o             \
> >       ice_virtchnl.o          \
> > diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
> > index 365c03d1c462..1ffecbdd361a 100644
> > --- a/drivers/net/ethernet/intel/ice/ice.h
> > +++ b/drivers/net/ethernet/intel/ice/ice.h
> > @@ -77,6 +77,7 @@
> >   #include "ice_gnss.h"
> >   #include "ice_irq.h"
> >   #include "ice_dpll.h"
> > +#include "ice_adapter.h"
> >
> >   #define ICE_BAR0            0
> >   #define ICE_REQ_DESC_MULTIPLE       32
> > @@ -544,6 +545,7 @@ struct ice_agg_node {
> >
> >   struct ice_pf {
> >       struct pci_dev *pdev;
> > +     struct ice_adapter *adapter;
> >
> >       struct devlink_region *nvm_region;
> >       struct devlink_region *sram_region;
> > diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
> > new file mode 100644
> > index 000000000000..6b9eeba6edf7
> > --- /dev/null
> > +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
> > @@ -0,0 +1,107 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +// SPDX-FileCopyrightText: Copyright Red Hat
> > +
> > +#include <linux/cleanup.h>
> > +#include <linux/mutex.h>
> > +#include <linux/pci.h>
> > +#include <linux/slab.h>
> > +#include <linux/xarray.h>
> > +#include "ice_adapter.h"
> > +
> > +static DEFINE_XARRAY(ice_adapters);
> > +
> > +static unsigned long ice_adapter_index(const struct pci_dev *pdev)
> > +{
> > +     unsigned int domain = pci_domain_nr(pdev->bus);
> > +
> > +     WARN_ON((unsigned long)domain >> (BITS_PER_LONG - 13));
> > +     return ((unsigned long)domain << 13) |
> > +            ((unsigned long)pdev->bus->number << 5) |
> > +            PCI_SLOT(pdev->devfn);
>
> xarray is free to use non-0-based indices, so this whole function could
> be simplified as:
>
> /* note the PCI_SLOT() call to clear function from devfn */
> return PCI_DEVID(pci_domain_nr(pdev->bus), PCI_SLOT(pdev->devfn));

This is not equivalent. My function encodes three PCI numbers into the
index: domain, bus, slot.
Your version would have only: domain, slot. The bus number would be
lost. And also, higher-than-16 bits of the domain would be lost
unnecessarily.

> > +}
> > +
> > +static struct ice_adapter *ice_adapter_new(void)
> > +{
> > +     struct ice_adapter *adapter;
> > +
> > +     adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
> > +     if (!adapter)
> > +             return NULL;
> > +
> > +     refcount_set(&adapter->refcount, 1);
> > +
> > +     return adapter;
> > +}
> > +
> > +static void ice_adapter_free(struct ice_adapter *adapter)
> > +{
> > +     kfree(adapter);
> > +}
>
> I would say that this is too thin wrapper for "kernel interface" (memory
> ptr) to warrant it, IOW just place kfree in place of ice_adapter_free,
> that will also free us from the need to use DEFINE_FREE()

I am anticipating the need for struct members to destroy here. Eg. in
order to replace ice_ptp_lock entirely, I will add a mutex, which will
require mutex_destroy to be called in ice_adapter_free.

>
> > +
> > +DEFINE_FREE(ice_adapter_free, struct ice_adapter*, if (_T) ice_adapter_free(_T))
> > +
> > +/**
> > + * ice_adapter_get - Get a shared ice_adapter structure.
> > + * @pdev: Pointer to the pci_dev whose driver is getting the ice_adapter.
> > + *
> > + * Gets a pointer to a shared ice_adapter structure. Physical functions (PFs)
> > + * of the same multi-function PCI device share one ice_adapter structure.
> > + * The ice_adapter is reference-counted. The PF driver must use ice_adapter_put
> > + * to release its reference.
> > + *
> > + * Context: Process, may sleep.
> > + * Return:  Pointer to ice_adapter on success.
> > + *          ERR_PTR() on error. -ENOMEM is the only possible error.
>
> that's inconvenient, to the point that it will be better to have a dummy
> static entry used for this purpose, but I see that this is something
> broader that this particular use case, so - feel free to ignore

Perhaps I don't get what you mean by a static entry. Maybe a static
singleton instance of struct ice_adapter? I don't want that, because I
can have multiple E810 NICs in the system and I don't want them to
share anything unnecessarily.

Besides, this allocates only a small amount of memory and the
allocation is GFP_KERNEL. It won't fail under any realistic scenario
(afaik, the "too small to fail" rule still holds in the kernel). This
is called only from ice_probe, which surely allocates much more
memory. I am not calling this every time I need to access the
ice_adapter. I'm keeping a pointer in ice_pf.

> > + */
> > +struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
> > +{
> > +     struct ice_adapter *ret, __free(ice_adapter_free) *adapter = NULL;
> > +     unsigned long index = ice_adapter_index(pdev);
> > +
> > +     adapter = ice_adapter_new();
> > +     if (!adapter)
> > +             return ERR_PTR(-ENOMEM);
> > +
> > +     xa_lock(&ice_adapters);
> > +     ret = __xa_cmpxchg(&ice_adapters, index, NULL, adapter, GFP_KERNEL);
> > +     if (xa_is_err(ret)) {
> > +             ret = ERR_PTR(xa_err(ret));
> > +             goto unlock;
> > +     }
> > +     if (ret) {
> > +             refcount_inc(&ret->refcount);
> > +             goto unlock;
> > +     }
> > +     ret = no_free_ptr(adapter);
>
> nice solution, but this is an idiom that we want across the kernel
> instead of opting out of auto management in such cases as this one?
> (esp that you have open-coded locking anyway)

I will follow up by changing the xa_lock usage to a guard if my
recently proposed patch ("xarray: add guard definitions for xa_lock")
gets accepted:
https://lore.kernel.org/lkml/20240228135352.14444-1-mschmidt@redhat.com/

> I would expect to have explicit two stores (first to ensure index is
> present, second to overwrite entry if null) easier than cmpxchg
> + unneeded allocation (that could cause whole function to fail!)

For reasons mentioned above, I am not worried about the allocation failing.
I am afraid moving away from the cmpxchg approach would force me to either:
 - Re-add the additional mutex I had in v1 of this series and that
Jiri Pirko asked me to remove and rely on xa_lock; or
 - Allocate ice_adapter under xa_lock, i.e. with GFP_ATOMIC. That
would only make running into ENOMEM more likely.

> > +unlock:
> > +     xa_unlock(&ice_adapters);
> > +     return ret;
> > +}
> > +
> > +/**
> > + * ice_adapter_put - Release a reference to the shared ice_adapter structure.
> > + * @pdev: Pointer to the pci_dev whose driver is releasing the ice_adapter.
> > + *
> > + * Releases the reference to ice_adapter previously obtained with
> > + * ice_adapter_get.
> > + *
> > + * Context: Any.
> > + */
> > +void ice_adapter_put(const struct pci_dev *pdev)
> > +{
> > +     unsigned long index = ice_adapter_index(pdev);
> > +     struct ice_adapter *adapter;
> > +
> > +     xa_lock(&ice_adapters);
> > +     adapter = xa_load(&ice_adapters, index);
> > +     if (WARN_ON(!adapter))
> > +             goto unlock;
> > +
> > +     if (!refcount_dec_and_test(&adapter->refcount))
> > +             goto unlock;
> > +
> > +     WARN_ON(__xa_erase(&ice_adapters, index) != adapter);
> > +     ice_adapter_free(adapter);
> > +unlock:
> > +     xa_unlock(&ice_adapters);
> > +}
> > diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h
> > new file mode 100644
> > index 000000000000..cb5a02eb24c1
> > --- /dev/null
> > +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h
> > @@ -0,0 +1,22 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/* SPDX-FileCopyrightText: Copyright Red Hat */
> > +
> > +#ifndef _ICE_ADAPTER_H_
> > +#define _ICE_ADAPTER_H_
> > +
> > +#include <linux/refcount_types.h>
> > +
> > +struct pci_dev;
> > +
> > +/**
> > + * struct ice_adapter - PCI adapter resources shared across PFs
> > + * @refcount: Reference count. struct ice_pf objects hold the references.
> > + */
> > +struct ice_adapter {
> > +     refcount_t refcount;
>
> this is refcounted always under a lock, so could be plain "int",
> but not a big deal

Yes, I know, but the over/under-flow checks provided by refcount_t
keep me warm and fuzzy :)

> > +};
> > +
> > +struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev);
> > +void ice_adapter_put(const struct pci_dev *pdev);
> > +
> > +#endif /* _ICE_ADAPTER_H */
> > diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
> > index 8f73ba77e835..a3c545e56256 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_main.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> > @@ -5093,6 +5093,7 @@ static int
> >   ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
> >   {
> >       struct device *dev = &pdev->dev;
> > +     struct ice_adapter *adapter;
> >       struct ice_pf *pf;
> >       struct ice_hw *hw;
> >       int err;
> > @@ -5145,7 +5146,12 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
> >
> >       pci_set_master(pdev);
> >
> > +     adapter = ice_adapter_get(pdev);
> > +     if (IS_ERR(adapter))
> > +             return PTR_ERR(adapter);
> > +
> >       pf->pdev = pdev;
> > +     pf->adapter = adapter;
> >       pci_set_drvdata(pdev, pf);
> >       set_bit(ICE_DOWN, pf->state);
> >       /* Disable service task until DOWN bit is cleared */
> > @@ -5196,6 +5202,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
> >   err_load:
> >       ice_deinit(pf);
> >   err_init:
> > +     ice_adapter_put(pdev);
> >       pci_disable_device(pdev);
> >       return err;
> >   }
> > @@ -5302,6 +5309,7 @@ static void ice_remove(struct pci_dev *pdev)
> >       ice_setup_mc_magic_wake(pf);
> >       ice_set_wake(pf);
> >
> > +     ice_adapter_put(pdev);
> >       pci_disable_device(pdev);
> >   }
> >
>
Przemek Kitszel March 11, 2024, 11:05 a.m. UTC | #5
On 3/8/24 15:18, Michal Schmidt wrote:
> On Fri, Mar 8, 2024 at 1:17 PM Przemek Kitszel
> <przemyslaw.kitszel@intel.com> wrote:
>> On 3/7/24 23:25, Michal Schmidt wrote:
>>> There is a need for synchronization between ice PFs on the same physical
>>> adapter.
>>>
>>> Add a "struct ice_adapter" for holding data shared between PFs of the
>>> same multifunction PCI device. The struct is refcounted - each ice_pf
>>> holds a reference to it.
>>>
>>> Its first use will be for PTP. I expect it will be useful also to
>>> improve the ugliness that is ice_prot_id_tbl.
>>>
>>> Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
>>
>> Thank you very much for this series, we have spotted the need for
>> something like that very recently, I have already pinged our PTP folks
>> to take a look. (+CC Sergey)
>>
>> Why not wipe ice_ptp_lock() entirely?
>> (could be left for Intel folks though)
> 
> I am doing this too, just not yet in this series I posted, because I
> did not want to expand the scope of the series between reviews. See my
> gitlab branch I linked in the cover letter, specifically this patch:
> https://gitlab.com/mschmidt2/linux/-/commit/89a1bd2904ac8b0614bcfc2fce464bf5f60b0f0c
> 
>> please find the usual code related feedback inline
>> (again, I really appreciate and I am grateful for this series)
>>
>>> ---
>>>    drivers/net/ethernet/intel/ice/Makefile      |   3 +-
>>>    drivers/net/ethernet/intel/ice/ice.h         |   2 +
>>>    drivers/net/ethernet/intel/ice/ice_adapter.c | 107 +++++++++++++++++++
>>>    drivers/net/ethernet/intel/ice/ice_adapter.h |  22 ++++
>>>    drivers/net/ethernet/intel/ice/ice_main.c    |   8 ++
>>>    5 files changed, 141 insertions(+), 1 deletion(-)
>>>    create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.c
>>>    create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.h
>>>
>>> diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
>>> index cddd82d4ca0f..4fa09c321440 100644
>>> --- a/drivers/net/ethernet/intel/ice/Makefile
>>> +++ b/drivers/net/ethernet/intel/ice/Makefile
>>> @@ -36,7 +36,8 @@ ice-y := ice_main.o \
>>>         ice_repr.o     \
>>>         ice_tc_lib.o   \
>>>         ice_fwlog.o    \
>>> -      ice_debugfs.o
>>> +      ice_debugfs.o  \
>>> +      ice_adapter.o
>>>    ice-$(CONFIG_PCI_IOV) +=    \
>>>        ice_sriov.o             \
>>>        ice_virtchnl.o          \
>>> diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
>>> index 365c03d1c462..1ffecbdd361a 100644
>>> --- a/drivers/net/ethernet/intel/ice/ice.h
>>> +++ b/drivers/net/ethernet/intel/ice/ice.h
>>> @@ -77,6 +77,7 @@
>>>    #include "ice_gnss.h"
>>>    #include "ice_irq.h"
>>>    #include "ice_dpll.h"
>>> +#include "ice_adapter.h"
>>>
>>>    #define ICE_BAR0            0
>>>    #define ICE_REQ_DESC_MULTIPLE       32
>>> @@ -544,6 +545,7 @@ struct ice_agg_node {
>>>
>>>    struct ice_pf {
>>>        struct pci_dev *pdev;
>>> +     struct ice_adapter *adapter;
>>>
>>>        struct devlink_region *nvm_region;
>>>        struct devlink_region *sram_region;
>>> diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
>>> new file mode 100644
>>> index 000000000000..6b9eeba6edf7
>>> --- /dev/null
>>> +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
>>> @@ -0,0 +1,107 @@
>>> +// SPDX-License-Identifier: GPL-2.0-only
>>> +// SPDX-FileCopyrightText: Copyright Red Hat
>>> +
>>> +#include <linux/cleanup.h>
>>> +#include <linux/mutex.h>
>>> +#include <linux/pci.h>
>>> +#include <linux/slab.h>
>>> +#include <linux/xarray.h>
>>> +#include "ice_adapter.h"
>>> +
>>> +static DEFINE_XARRAY(ice_adapters);
>>> +
>>> +static unsigned long ice_adapter_index(const struct pci_dev *pdev)
>>> +{
>>> +     unsigned int domain = pci_domain_nr(pdev->bus);
>>> +
>>> +     WARN_ON((unsigned long)domain >> (BITS_PER_LONG - 13));
>>> +     return ((unsigned long)domain << 13) |
>>> +            ((unsigned long)pdev->bus->number << 5) |
>>> +            PCI_SLOT(pdev->devfn);
>>
>> xarray is free to use non-0-based indices, so this whole function could
>> be simplified as:
>>
>> /* note the PCI_SLOT() call to clear function from devfn */
>> return PCI_DEVID(pci_domain_nr(pdev->bus), PCI_SLOT(pdev->devfn));
> 
> This is not equivalent. My function encodes three PCI numbers into the
> index: domain, bus, slot.
> Your version would have only: domain, slot. The bus number would be
> lost. And also, higher-than-16 bits of the domain would be lost
> unnecessarily.
> 
>>> +}
>>> +
>>> +static struct ice_adapter *ice_adapter_new(void)
>>> +{
>>> +     struct ice_adapter *adapter;
>>> +
>>> +     adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
>>> +     if (!adapter)
>>> +             return NULL;
>>> +
>>> +     refcount_set(&adapter->refcount, 1);
>>> +
>>> +     return adapter;
>>> +}
>>> +
>>> +static void ice_adapter_free(struct ice_adapter *adapter)
>>> +{
>>> +     kfree(adapter);
>>> +}
>>
>> I would say that this is too thin wrapper for "kernel interface" (memory
>> ptr) to warrant it, IOW just place kfree in place of ice_adapter_free,
>> that will also free us from the need to use DEFINE_FREE()
> 
> I am anticipating the need for struct members to destroy here. Eg. in
> order to replace ice_ptp_lock entirely, I will add a mutex, which will
> require mutex_destroy to be called in ice_adapter_free.
> 
>>
>>> +
>>> +DEFINE_FREE(ice_adapter_free, struct ice_adapter*, if (_T) ice_adapter_free(_T))
>>> +
>>> +/**
>>> + * ice_adapter_get - Get a shared ice_adapter structure.
>>> + * @pdev: Pointer to the pci_dev whose driver is getting the ice_adapter.
>>> + *
>>> + * Gets a pointer to a shared ice_adapter structure. Physical functions (PFs)
>>> + * of the same multi-function PCI device share one ice_adapter structure.
>>> + * The ice_adapter is reference-counted. The PF driver must use ice_adapter_put
>>> + * to release its reference.
>>> + *
>>> + * Context: Process, may sleep.
>>> + * Return:  Pointer to ice_adapter on success.
>>> + *          ERR_PTR() on error. -ENOMEM is the only possible error.
>>
>> that's inconvenient, to the point that it will be better to have a dummy
>> static entry used for this purpose, but I see that this is something
>> broader that this particular use case, so - feel free to ignore
> 
> Perhaps I don't get what you mean by a static entry. Maybe a static
> singleton instance of struct ice_adapter? I don't want that, because I
> can have multiple E810 NICs in the system and I don't want them to
> share anything unnecessarily.
> 
> Besides, this allocates only a small amount of memory and the
> allocation is GFP_KERNEL. It won't fail under any realistic scenario
> (afaik, the "too small to fail" rule still holds in the kernel). This
> is called only from ice_probe, which surely allocates much more
> memory. I am not calling this every time I need to access the
> ice_adapter. I'm keeping a pointer in ice_pf.
> 
>>> + */
>>> +struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
>>> +{
>>> +     struct ice_adapter *ret, __free(ice_adapter_free) *adapter = NULL;
>>> +     unsigned long index = ice_adapter_index(pdev);
>>> +
>>> +     adapter = ice_adapter_new();
>>> +     if (!adapter)
>>> +             return ERR_PTR(-ENOMEM);
>>> +
>>> +     xa_lock(&ice_adapters);
>>> +     ret = __xa_cmpxchg(&ice_adapters, index, NULL, adapter, GFP_KERNEL);
>>> +     if (xa_is_err(ret)) {
>>> +             ret = ERR_PTR(xa_err(ret));
>>> +             goto unlock;
>>> +     }
>>> +     if (ret) {
>>> +             refcount_inc(&ret->refcount);
>>> +             goto unlock;
>>> +     }
>>> +     ret = no_free_ptr(adapter);
>>
>> nice solution, but this is an idiom that we want across the kernel
>> instead of opting out of auto management in such cases as this one?
>> (esp that you have open-coded locking anyway)
> 
> I will follow up by changing the xa_lock usage to a guard if my
> recently proposed patch ("xarray: add guard definitions for xa_lock")
> gets accepted:
> https://lore.kernel.org/lkml/20240228135352.14444-1-mschmidt@redhat.com/
> 
>> I would expect to have explicit two stores (first to ensure index is
>> present, second to overwrite entry if null) easier than cmpxchg
>> + unneeded allocation (that could cause whole function to fail!)
> 
> For reasons mentioned above, I am not worried about the allocation failing.
> I am afraid moving away from the cmpxchg approach would force me to either:
>   - Re-add the additional mutex I had in v1 of this series and that
> Jiri Pirko asked me to remove and rely on xa_lock; or
>   - Allocate ice_adapter under xa_lock, i.e. with GFP_ATOMIC. That
> would only make running into ENOMEM more likely.

good point, thank for explanation :)


[...]
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index cddd82d4ca0f..4fa09c321440 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -36,7 +36,8 @@  ice-y := ice_main.o	\
 	 ice_repr.o	\
 	 ice_tc_lib.o	\
 	 ice_fwlog.o	\
-	 ice_debugfs.o
+	 ice_debugfs.o  \
+	 ice_adapter.o
 ice-$(CONFIG_PCI_IOV) +=	\
 	ice_sriov.o		\
 	ice_virtchnl.o		\
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 365c03d1c462..1ffecbdd361a 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -77,6 +77,7 @@ 
 #include "ice_gnss.h"
 #include "ice_irq.h"
 #include "ice_dpll.h"
+#include "ice_adapter.h"
 
 #define ICE_BAR0		0
 #define ICE_REQ_DESC_MULTIPLE	32
@@ -544,6 +545,7 @@  struct ice_agg_node {
 
 struct ice_pf {
 	struct pci_dev *pdev;
+	struct ice_adapter *adapter;
 
 	struct devlink_region *nvm_region;
 	struct devlink_region *sram_region;
diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
new file mode 100644
index 000000000000..6b9eeba6edf7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
@@ -0,0 +1,107 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+// SPDX-FileCopyrightText: Copyright Red Hat
+
+#include <linux/cleanup.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+#include "ice_adapter.h"
+
+static DEFINE_XARRAY(ice_adapters);
+
+static unsigned long ice_adapter_index(const struct pci_dev *pdev)
+{
+	unsigned int domain = pci_domain_nr(pdev->bus);
+
+	WARN_ON((unsigned long)domain >> (BITS_PER_LONG - 13));
+	return ((unsigned long)domain << 13) |
+	       ((unsigned long)pdev->bus->number << 5) |
+	       PCI_SLOT(pdev->devfn);
+}
+
+static struct ice_adapter *ice_adapter_new(void)
+{
+	struct ice_adapter *adapter;
+
+	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+	if (!adapter)
+		return NULL;
+
+	refcount_set(&adapter->refcount, 1);
+
+	return adapter;
+}
+
+static void ice_adapter_free(struct ice_adapter *adapter)
+{
+	kfree(adapter);
+}
+
+DEFINE_FREE(ice_adapter_free, struct ice_adapter*, if (_T) ice_adapter_free(_T))
+
+/**
+ * ice_adapter_get - Get a shared ice_adapter structure.
+ * @pdev: Pointer to the pci_dev whose driver is getting the ice_adapter.
+ *
+ * Gets a pointer to a shared ice_adapter structure. Physical functions (PFs)
+ * of the same multi-function PCI device share one ice_adapter structure.
+ * The ice_adapter is reference-counted. The PF driver must use ice_adapter_put
+ * to release its reference.
+ *
+ * Context: Process, may sleep.
+ * Return:  Pointer to ice_adapter on success.
+ *          ERR_PTR() on error. -ENOMEM is the only possible error.
+ */
+struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
+{
+	struct ice_adapter *ret, __free(ice_adapter_free) *adapter = NULL;
+	unsigned long index = ice_adapter_index(pdev);
+
+	adapter = ice_adapter_new();
+	if (!adapter)
+		return ERR_PTR(-ENOMEM);
+
+	xa_lock(&ice_adapters);
+	ret = __xa_cmpxchg(&ice_adapters, index, NULL, adapter, GFP_KERNEL);
+	if (xa_is_err(ret)) {
+		ret = ERR_PTR(xa_err(ret));
+		goto unlock;
+	}
+	if (ret) {
+		refcount_inc(&ret->refcount);
+		goto unlock;
+	}
+	ret = no_free_ptr(adapter);
+unlock:
+	xa_unlock(&ice_adapters);
+	return ret;
+}
+
+/**
+ * ice_adapter_put - Release a reference to the shared ice_adapter structure.
+ * @pdev: Pointer to the pci_dev whose driver is releasing the ice_adapter.
+ *
+ * Releases the reference to ice_adapter previously obtained with
+ * ice_adapter_get.
+ *
+ * Context: Any.
+ */
+void ice_adapter_put(const struct pci_dev *pdev)
+{
+	unsigned long index = ice_adapter_index(pdev);
+	struct ice_adapter *adapter;
+
+	xa_lock(&ice_adapters);
+	adapter = xa_load(&ice_adapters, index);
+	if (WARN_ON(!adapter))
+		goto unlock;
+
+	if (!refcount_dec_and_test(&adapter->refcount))
+		goto unlock;
+
+	WARN_ON(__xa_erase(&ice_adapters, index) != adapter);
+	ice_adapter_free(adapter);
+unlock:
+	xa_unlock(&ice_adapters);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h
new file mode 100644
index 000000000000..cb5a02eb24c1
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_adapter.h
@@ -0,0 +1,22 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-FileCopyrightText: Copyright Red Hat */
+
+#ifndef _ICE_ADAPTER_H_
+#define _ICE_ADAPTER_H_
+
+#include <linux/refcount_types.h>
+
+struct pci_dev;
+
+/**
+ * struct ice_adapter - PCI adapter resources shared across PFs
+ * @refcount: Reference count. struct ice_pf objects hold the references.
+ */
+struct ice_adapter {
+	refcount_t refcount;
+};
+
+struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev);
+void ice_adapter_put(const struct pci_dev *pdev);
+
+#endif /* _ICE_ADAPTER_H */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 8f73ba77e835..a3c545e56256 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5093,6 +5093,7 @@  static int
 ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 {
 	struct device *dev = &pdev->dev;
+	struct ice_adapter *adapter;
 	struct ice_pf *pf;
 	struct ice_hw *hw;
 	int err;
@@ -5145,7 +5146,12 @@  ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
 	pci_set_master(pdev);
 
+	adapter = ice_adapter_get(pdev);
+	if (IS_ERR(adapter))
+		return PTR_ERR(adapter);
+
 	pf->pdev = pdev;
+	pf->adapter = adapter;
 	pci_set_drvdata(pdev, pf);
 	set_bit(ICE_DOWN, pf->state);
 	/* Disable service task until DOWN bit is cleared */
@@ -5196,6 +5202,7 @@  ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 err_load:
 	ice_deinit(pf);
 err_init:
+	ice_adapter_put(pdev);
 	pci_disable_device(pdev);
 	return err;
 }
@@ -5302,6 +5309,7 @@  static void ice_remove(struct pci_dev *pdev)
 	ice_setup_mc_magic_wake(pf);
 	ice_set_wake(pf);
 
+	ice_adapter_put(pdev);
 	pci_disable_device(pdev);
 }