diff mbox series

[iwl-next,v2] ice: Extend auxbus device naming

Message ID 20240423091459.72216-1-sergey.temerkhanov@intel.com (mailing list archive)
State Awaiting Upstream
Delegated to: Netdev Maintainers
Headers show
Series [iwl-next,v2] ice: Extend auxbus device naming | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply

Commit Message

Sergey Temerkhanov April 23, 2024, 9:14 a.m. UTC
Include segment/domain number in the device name to distinguish
between PCI devices located on different root complexes in
multi-segment configurations. Naming is changed from
ptp_<bus>_<slot>_clk<clock>  to ptp_<domain>_<bus>_<slot>_clk<clock>

v1->v2
Rebase on top of the latest changes

Signed-off-by: Sergey Temerkhanov <sergey.temerkhanov@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ptp.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

Comments

Jiri Pirko April 23, 2024, 11:36 a.m. UTC | #1
Tue, Apr 23, 2024 at 11:14:59AM CEST, sergey.temerkhanov@intel.com wrote:
>Include segment/domain number in the device name to distinguish
>between PCI devices located on different root complexes in
>multi-segment configurations. Naming is changed from
>ptp_<bus>_<slot>_clk<clock>  to ptp_<domain>_<bus>_<slot>_clk<clock>

I don't understand why you need to encode pci properties of a parent
device into the auxiliary bus name. Could you please explain the
motivation? Why you need a bus instance per PF?

The rest of the auxbus registrators don't do this. Could you please
align? Just have one bus for ice driver and that's it.


>
>v1->v2
>Rebase on top of the latest changes
>
>Signed-off-by: Sergey Temerkhanov <sergey.temerkhanov@intel.com>
>Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
>---
> drivers/net/ethernet/intel/ice/ice_ptp.c | 18 ++++++++++++------
> 1 file changed, 12 insertions(+), 6 deletions(-)
>
>diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
>index 402436b72322..744b102f7636 100644
>--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
>+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
>@@ -2993,8 +2993,9 @@ ice_ptp_auxbus_create_id_table(struct ice_pf *pf, const char *name)
> static int ice_ptp_register_auxbus_driver(struct ice_pf *pf)
> {
> 	struct auxiliary_driver *aux_driver;
>+	struct pci_dev *pdev = pf->pdev;
> 	struct ice_ptp *ptp;
>-	char busdev[8] = {};
>+	char busdev[16] = {};
> 	struct device *dev;
> 	char *name;
> 	int err;
>@@ -3005,8 +3006,10 @@ static int ice_ptp_register_auxbus_driver(struct ice_pf *pf)
> 	INIT_LIST_HEAD(&ptp->ports_owner.ports);
> 	mutex_init(&ptp->ports_owner.lock);
> 	if (ice_is_e810(&pf->hw))
>-		sprintf(busdev, "%u_%u_", pf->pdev->bus->number,
>-			PCI_SLOT(pf->pdev->devfn));
>+		snprintf(busdev, sizeof(busdev), "%u_%u_%u_",
>+			 pci_domain_nr(pdev->bus),
>+			 pdev->bus->number,
>+			 PCI_SLOT(pdev->devfn));
> 	name = devm_kasprintf(dev, GFP_KERNEL, "ptp_%sclk%u", busdev,
> 			      ice_get_ptp_src_clock_index(&pf->hw));
> 	if (!name)
>@@ -3210,8 +3213,9 @@ static void ice_ptp_release_auxbus_device(struct device *dev)
> static int ice_ptp_create_auxbus_device(struct ice_pf *pf)
> {
> 	struct auxiliary_device *aux_dev;
>+	struct pci_dev *pdev = pf->pdev;
> 	struct ice_ptp *ptp;
>-	char busdev[8] = {};
>+	char busdev[16] = {};
> 	struct device *dev;
> 	char *name;
> 	int err;
>@@ -3224,8 +3228,10 @@ static int ice_ptp_create_auxbus_device(struct ice_pf *pf)
> 	aux_dev = &ptp->port.aux_dev;
> 
> 	if (ice_is_e810(&pf->hw))
>-		sprintf(busdev, "%u_%u_", pf->pdev->bus->number,
>-			PCI_SLOT(pf->pdev->devfn));
>+		snprintf(busdev, sizeof(busdev), "%u_%u_%u_",
>+			 pci_domain_nr(pdev->bus),
>+			 pdev->bus->number,
>+			 PCI_SLOT(pdev->devfn));
> 
> 	name = devm_kasprintf(dev, GFP_KERNEL, "ptp_%sclk%u", busdev,
> 			      ice_get_ptp_src_clock_index(&pf->hw));
>-- 
>2.35.3
>
>
Sergey Temerkhanov April 23, 2024, 11:56 a.m. UTC | #2
> -----Original Message-----
> From: Jiri Pirko <jiri@resnulli.us>
> Sent: Tuesday, April 23, 2024 1:36 PM
> To: Temerkhanov, Sergey <sergey.temerkhanov@intel.com>
> Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Kitszel,
> Przemyslaw <przemyslaw.kitszel@intel.com>
> Subject: Re: [PATCH iwl-next v2] ice: Extend auxbus device naming
> 
> Tue, Apr 23, 2024 at 11:14:59AM CEST, sergey.temerkhanov@intel.com
> wrote:
> >Include segment/domain number in the device name to distinguish
> between
> >PCI devices located on different root complexes in multi-segment
> >configurations. Naming is changed from ptp_<bus>_<slot>_clk<clock>  to
> >ptp_<domain>_<bus>_<slot>_clk<clock>
> 
> I don't understand why you need to encode pci properties of a parent device
> into the auxiliary bus name. Could you please explain the motivation? Why
> you need a bus instance per PF?
> 
> The rest of the auxbus registrators don't do this. Could you please align? Just
> have one bus for ice driver and that's it.

This patch adds support for multi-segment PCIe configurations.
An auxdev is created for each adapter, which has a clock, in the system. There can be
more than one adapter present, so there exists a possibility of device naming conflict.
To avoid it, auxdevs are named according to the PCI geographical addresses of the adapters.

Some systems may have adapters connected to different RCs which represent separate
PCI segments/domains. In such cases, BDF numbers  for these adapters can match, triggering
the naming conflict again. To avoid that, auxdev names are further extended to include the
segment/domain number.
  
> 
> 
> >
> >v1->v2
> >Rebase on top of the latest changes
> >
> >Signed-off-by: Sergey Temerkhanov <sergey.temerkhanov@intel.com>
> >Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
> >---
> > drivers/net/ethernet/intel/ice/ice_ptp.c | 18 ++++++++++++------
> > 1 file changed, 12 insertions(+), 6 deletions(-)
> >
> >diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c
> >b/drivers/net/ethernet/intel/ice/ice_ptp.c
> >index 402436b72322..744b102f7636 100644
> >--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
> >+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
> >@@ -2993,8 +2993,9 @@ ice_ptp_auxbus_create_id_table(struct ice_pf
> *pf,
> >const char *name)  static int ice_ptp_register_auxbus_driver(struct
> >ice_pf *pf)  {
> > 	struct auxiliary_driver *aux_driver;
> >+	struct pci_dev *pdev = pf->pdev;
> > 	struct ice_ptp *ptp;
> >-	char busdev[8] = {};
> >+	char busdev[16] = {};
> > 	struct device *dev;
> > 	char *name;
> > 	int err;
> >@@ -3005,8 +3006,10 @@ static int ice_ptp_register_auxbus_driver(struct
> ice_pf *pf)
> > 	INIT_LIST_HEAD(&ptp->ports_owner.ports);
> > 	mutex_init(&ptp->ports_owner.lock);
> > 	if (ice_is_e810(&pf->hw))
> >-		sprintf(busdev, "%u_%u_", pf->pdev->bus->number,
> >-			PCI_SLOT(pf->pdev->devfn));
> >+		snprintf(busdev, sizeof(busdev), "%u_%u_%u_",
> >+			 pci_domain_nr(pdev->bus),
> >+			 pdev->bus->number,
> >+			 PCI_SLOT(pdev->devfn));
> > 	name = devm_kasprintf(dev, GFP_KERNEL, "ptp_%sclk%u", busdev,
> > 			      ice_get_ptp_src_clock_index(&pf->hw));
> > 	if (!name)
> >@@ -3210,8 +3213,9 @@ static void ice_ptp_release_auxbus_device(struct
> >device *dev)  static int ice_ptp_create_auxbus_device(struct ice_pf
> >*pf)  {
> > 	struct auxiliary_device *aux_dev;
> >+	struct pci_dev *pdev = pf->pdev;
> > 	struct ice_ptp *ptp;
> >-	char busdev[8] = {};
> >+	char busdev[16] = {};
> > 	struct device *dev;
> > 	char *name;
> > 	int err;
> >@@ -3224,8 +3228,10 @@ static int ice_ptp_create_auxbus_device(struct
> ice_pf *pf)
> > 	aux_dev = &ptp->port.aux_dev;
> >
> > 	if (ice_is_e810(&pf->hw))
> >-		sprintf(busdev, "%u_%u_", pf->pdev->bus->number,
> >-			PCI_SLOT(pf->pdev->devfn));
> >+		snprintf(busdev, sizeof(busdev), "%u_%u_%u_",
> >+			 pci_domain_nr(pdev->bus),
> >+			 pdev->bus->number,
> >+			 PCI_SLOT(pdev->devfn));
> >
> > 	name = devm_kasprintf(dev, GFP_KERNEL, "ptp_%sclk%u", busdev,
> > 			      ice_get_ptp_src_clock_index(&pf->hw));
> >--
> >2.35.3
> >
> >
Jiri Pirko April 23, 2024, 1:14 p.m. UTC | #3
Tue, Apr 23, 2024 at 01:56:55PM CEST, sergey.temerkhanov@intel.com wrote:
>
>
>> -----Original Message-----
>> From: Jiri Pirko <jiri@resnulli.us>
>> Sent: Tuesday, April 23, 2024 1:36 PM
>> To: Temerkhanov, Sergey <sergey.temerkhanov@intel.com>
>> Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Kitszel,
>> Przemyslaw <przemyslaw.kitszel@intel.com>
>> Subject: Re: [PATCH iwl-next v2] ice: Extend auxbus device naming
>> 
>> Tue, Apr 23, 2024 at 11:14:59AM CEST, sergey.temerkhanov@intel.com
>> wrote:
>> >Include segment/domain number in the device name to distinguish
>> between
>> >PCI devices located on different root complexes in multi-segment
>> >configurations. Naming is changed from ptp_<bus>_<slot>_clk<clock>  to
>> >ptp_<domain>_<bus>_<slot>_clk<clock>
>> 
>> I don't understand why you need to encode pci properties of a parent device
>> into the auxiliary bus name. Could you please explain the motivation? Why
>> you need a bus instance per PF?
>> 
>> The rest of the auxbus registrators don't do this. Could you please align? Just
>> have one bus for ice driver and that's it.
>
>This patch adds support for multi-segment PCIe configurations.
>An auxdev is created for each adapter, which has a clock, in the system. There can be

You are trying to change auxiliary bus name.


>more than one adapter present, so there exists a possibility of device naming conflict.
>To avoid it, auxdevs are named according to the PCI geographical addresses of the adapters.

Why? It's the auxdev, the name should not contain anything related to
PCI, no reason for it. I asked for motivation, you didn't provide any.

Again, could you please avoid creating auxiliary bus per-PF and just
have one auxiliary but per-ice-driver?


>
>Some systems may have adapters connected to different RCs which represent separate
>PCI segments/domains. In such cases, BDF numbers  for these adapters can match, triggering
>the naming conflict again. To avoid that, auxdev names are further extended to include the
>segment/domain number.
>  
>> 
>> 
>> >
>> >v1->v2
>> >Rebase on top of the latest changes
>> >
>> >Signed-off-by: Sergey Temerkhanov <sergey.temerkhanov@intel.com>
>> >Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
>> >---
>> > drivers/net/ethernet/intel/ice/ice_ptp.c | 18 ++++++++++++------
>> > 1 file changed, 12 insertions(+), 6 deletions(-)
>> >
>> >diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c
>> >b/drivers/net/ethernet/intel/ice/ice_ptp.c
>> >index 402436b72322..744b102f7636 100644
>> >--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
>> >+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
>> >@@ -2993,8 +2993,9 @@ ice_ptp_auxbus_create_id_table(struct ice_pf
>> *pf,
>> >const char *name)  static int ice_ptp_register_auxbus_driver(struct
>> >ice_pf *pf)  {
>> > 	struct auxiliary_driver *aux_driver;
>> >+	struct pci_dev *pdev = pf->pdev;
>> > 	struct ice_ptp *ptp;
>> >-	char busdev[8] = {};
>> >+	char busdev[16] = {};
>> > 	struct device *dev;
>> > 	char *name;
>> > 	int err;
>> >@@ -3005,8 +3006,10 @@ static int ice_ptp_register_auxbus_driver(struct
>> ice_pf *pf)
>> > 	INIT_LIST_HEAD(&ptp->ports_owner.ports);
>> > 	mutex_init(&ptp->ports_owner.lock);
>> > 	if (ice_is_e810(&pf->hw))
>> >-		sprintf(busdev, "%u_%u_", pf->pdev->bus->number,
>> >-			PCI_SLOT(pf->pdev->devfn));
>> >+		snprintf(busdev, sizeof(busdev), "%u_%u_%u_",
>> >+			 pci_domain_nr(pdev->bus),
>> >+			 pdev->bus->number,
>> >+			 PCI_SLOT(pdev->devfn));
>> > 	name = devm_kasprintf(dev, GFP_KERNEL, "ptp_%sclk%u", busdev,
>> > 			      ice_get_ptp_src_clock_index(&pf->hw));
>> > 	if (!name)
>> >@@ -3210,8 +3213,9 @@ static void ice_ptp_release_auxbus_device(struct
>> >device *dev)  static int ice_ptp_create_auxbus_device(struct ice_pf
>> >*pf)  {
>> > 	struct auxiliary_device *aux_dev;
>> >+	struct pci_dev *pdev = pf->pdev;
>> > 	struct ice_ptp *ptp;
>> >-	char busdev[8] = {};
>> >+	char busdev[16] = {};
>> > 	struct device *dev;
>> > 	char *name;
>> > 	int err;
>> >@@ -3224,8 +3228,10 @@ static int ice_ptp_create_auxbus_device(struct
>> ice_pf *pf)
>> > 	aux_dev = &ptp->port.aux_dev;
>> >
>> > 	if (ice_is_e810(&pf->hw))
>> >-		sprintf(busdev, "%u_%u_", pf->pdev->bus->number,
>> >-			PCI_SLOT(pf->pdev->devfn));
>> >+		snprintf(busdev, sizeof(busdev), "%u_%u_%u_",
>> >+			 pci_domain_nr(pdev->bus),
>> >+			 pdev->bus->number,
>> >+			 PCI_SLOT(pdev->devfn));
>> >
>> > 	name = devm_kasprintf(dev, GFP_KERNEL, "ptp_%sclk%u", busdev,
>> > 			      ice_get_ptp_src_clock_index(&pf->hw));
>> >--
>> >2.35.3
>> >
>> >
Jacob Keller April 23, 2024, 10:03 p.m. UTC | #4
On 4/23/2024 6:14 AM, Jiri Pirko wrote:
> Tue, Apr 23, 2024 at 01:56:55PM CEST, sergey.temerkhanov@intel.com wrote:
>>
>>
>>> -----Original Message-----
>>> From: Jiri Pirko <jiri@resnulli.us>
>>> Sent: Tuesday, April 23, 2024 1:36 PM
>>> To: Temerkhanov, Sergey <sergey.temerkhanov@intel.com>
>>> Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Kitszel,
>>> Przemyslaw <przemyslaw.kitszel@intel.com>
>>> Subject: Re: [PATCH iwl-next v2] ice: Extend auxbus device naming
>>>
>>> Tue, Apr 23, 2024 at 11:14:59AM CEST, sergey.temerkhanov@intel.com
>>> wrote:
>>>> Include segment/domain number in the device name to distinguish
>>> between
>>>> PCI devices located on different root complexes in multi-segment
>>>> configurations. Naming is changed from ptp_<bus>_<slot>_clk<clock>  to
>>>> ptp_<domain>_<bus>_<slot>_clk<clock>
>>>
>>> I don't understand why you need to encode pci properties of a parent device
>>> into the auxiliary bus name. Could you please explain the motivation? Why
>>> you need a bus instance per PF?
>>>
>>> The rest of the auxbus registrators don't do this. Could you please align? Just
>>> have one bus for ice driver and that's it.
>>
>> This patch adds support for multi-segment PCIe configurations.
>> An auxdev is created for each adapter, which has a clock, in the system. There can be
> 
> You are trying to change auxiliary bus name.
> 
> 
>> more than one adapter present, so there exists a possibility of device naming conflict.
>> To avoid it, auxdevs are named according to the PCI geographical addresses of the adapters.
> 
> Why? It's the auxdev, the name should not contain anything related to
> PCI, no reason for it. I asked for motivation, you didn't provide any.
> 

We aren't creating one auxbus per PF. We're creating one auxbus per
*clock*. The device has multiple clocks in some configurations.

We need to connect each PF to the same clock controller, because there
is only one clock owner for the device in a multi-port card.

> Again, could you please avoid creating auxiliary bus per-PF and just
> have one auxiliary but per-ice-driver?
> 

We can't have one per-ice driver, because we don't want to connect ports
from a different device to the same clock. If you have two ice devices
plugged in, the ports on each device are separate from each other.

The goal here is to connect the clock ports to the clock owner.

Worse, as described here, we do have some devices which combine multiple
adapters together and tie their clocks together via HW signaling. In
those cases the clocks *do* need to communicate across the device, but
only to other ports on the same physical device, not to ports on a
different device.

Perhaps auxbus is the wrong approach here? but how else can we connect
these ports without over-connecting to other ports? We could write
bespoke code which finds these devices, but that felt like it was risky
and convoluted.

Perhaps it would be ideal if something in the PCI layer could connect
these together? I don't know how that would be implemented though..

The fundamental problem is that we have a multi-function device with
some shared functionality which we need to manage across function. In
this case it is the clock should only have one entity, while the ports
connected to it are controlled independently by PF. We tried a variety
of ways to solve this in the past, mostly with hacky solutions.

We need an entity which can find all the ports connected to a single
clock, and the port needs to be able to get back to its clock. If we
used a single auxdriver for this, that driver would have to maintain
some hash tables or connections in order to locate which ports belonged
to the clock. It would also need to figure out which port was the
"owner" so that it could send owner-based requests through that port,
since it would not inherently have access to the clock hardware since
its a global entity and not tied to a port.

In the current model, the driver can go back to the PF that spawned it
to manage the clock, and uses the aux devices as a mechanism to connect
to each port for purposes such as initializing the PHYs, and caching the
PTP hardware time for timestamp extension.

Maybe you disagree with this use of auxbus? Do you have any suggestions
for a separate model?

We could make use of ice_adapter, though we'd need some logic to manage
devices which have multiple clocks, as well as devices like the one
Sergey is working on which tie multiple adapters together.
Jiri Pirko April 24, 2024, 3:07 p.m. UTC | #5
Wed, Apr 24, 2024 at 12:03:25AM CEST, jacob.e.keller@intel.com wrote:
>
>
>On 4/23/2024 6:14 AM, Jiri Pirko wrote:
>> Tue, Apr 23, 2024 at 01:56:55PM CEST, sergey.temerkhanov@intel.com wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Jiri Pirko <jiri@resnulli.us>
>>>> Sent: Tuesday, April 23, 2024 1:36 PM
>>>> To: Temerkhanov, Sergey <sergey.temerkhanov@intel.com>
>>>> Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Kitszel,
>>>> Przemyslaw <przemyslaw.kitszel@intel.com>
>>>> Subject: Re: [PATCH iwl-next v2] ice: Extend auxbus device naming
>>>>
>>>> Tue, Apr 23, 2024 at 11:14:59AM CEST, sergey.temerkhanov@intel.com
>>>> wrote:
>>>>> Include segment/domain number in the device name to distinguish
>>>> between
>>>>> PCI devices located on different root complexes in multi-segment
>>>>> configurations. Naming is changed from ptp_<bus>_<slot>_clk<clock>  to
>>>>> ptp_<domain>_<bus>_<slot>_clk<clock>
>>>>
>>>> I don't understand why you need to encode pci properties of a parent device
>>>> into the auxiliary bus name. Could you please explain the motivation? Why
>>>> you need a bus instance per PF?
>>>>
>>>> The rest of the auxbus registrators don't do this. Could you please align? Just
>>>> have one bus for ice driver and that's it.
>>>
>>> This patch adds support for multi-segment PCIe configurations.
>>> An auxdev is created for each adapter, which has a clock, in the system. There can be
>> 
>> You are trying to change auxiliary bus name.
>> 
>> 
>>> more than one adapter present, so there exists a possibility of device naming conflict.
>>> To avoid it, auxdevs are named according to the PCI geographical addresses of the adapters.
>> 
>> Why? It's the auxdev, the name should not contain anything related to
>> PCI, no reason for it. I asked for motivation, you didn't provide any.
>> 
>
>We aren't creating one auxbus per PF. We're creating one auxbus per
>*clock*. The device has multiple clocks in some configurations.

Does not matter. Why you need separate bus for whatever-instance? Why
can't you just have one ice-ptp bus and put devices on it?


>
>We need to connect each PF to the same clock controller, because there
>is only one clock owner for the device in a multi-port card.

Connect how? But putting a PF-device on a per-clock bus? That sounds
quite odd. How did you figure out this usage of auxiliary bus?


>
>> Again, could you please avoid creating auxiliary bus per-PF and just
>> have one auxiliary but per-ice-driver?
>> 
>
>We can't have one per-ice driver, because we don't want to connect ports
>from a different device to the same clock. If you have two ice devices
>plugged in, the ports on each device are separate from each other.
>
>The goal here is to connect the clock ports to the clock owner.
>
>Worse, as described here, we do have some devices which combine multiple
>adapters together and tie their clocks together via HW signaling. In
>those cases the clocks *do* need to communicate across the device, but
>only to other ports on the same physical device, not to ports on a
>different device.
>
>Perhaps auxbus is the wrong approach here? but how else can we connect

Yeah, feels quite wrong.


>these ports without over-connecting to other ports? We could write
>bespoke code which finds these devices, but that felt like it was risky
>and convoluted.

Sounds you need something you have for DPLL subsystem. Feels to me that
your hw design is quite disconnected from the Linux device model :/


>
>Perhaps it would be ideal if something in the PCI layer could connect
>these together? I don't know how that would be implemented though..
>
>The fundamental problem is that we have a multi-function device with
>some shared functionality which we need to manage across function. In
>this case it is the clock should only have one entity, while the ports
>connected to it are controlled independently by PF. We tried a variety
>of ways to solve this in the past, mostly with hacky solutions.
>
>We need an entity which can find all the ports connected to a single
>clock, and the port needs to be able to get back to its clock. If we
>used a single auxdriver for this, that driver would have to maintain
>some hash tables or connections in order to locate which ports belonged
>to the clock. It would also need to figure out which port was the
>"owner" so that it could send owner-based requests through that port,
>since it would not inherently have access to the clock hardware since
>its a global entity and not tied to a port.
>
>In the current model, the driver can go back to the PF that spawned it
>to manage the clock, and uses the aux devices as a mechanism to connect
>to each port for purposes such as initializing the PHYs, and caching the
>PTP hardware time for timestamp extension.
>
>Maybe you disagree with this use of auxbus? Do you have any suggestions
>for a separate model?
>
>We could make use of ice_adapter, though we'd need some logic to manage
>devices which have multiple clocks, as well as devices like the one
>Sergey is working on which tie multiple adapters together.

Perhaps that is an answer. Maybe we can make DPLL much more simple after
that :)
Jacob Keller April 24, 2024, 4:56 p.m. UTC | #6
On 4/24/2024 8:07 AM, Jiri Pirko wrote:
> Wed, Apr 24, 2024 at 12:03:25AM CEST, jacob.e.keller@intel.com wrote:
>>
>>
>> On 4/23/2024 6:14 AM, Jiri Pirko wrote:
>>> Tue, Apr 23, 2024 at 01:56:55PM CEST, sergey.temerkhanov@intel.com wrote:
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: Jiri Pirko <jiri@resnulli.us>
>>>>> Sent: Tuesday, April 23, 2024 1:36 PM
>>>>> To: Temerkhanov, Sergey <sergey.temerkhanov@intel.com>
>>>>> Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Kitszel,
>>>>> Przemyslaw <przemyslaw.kitszel@intel.com>
>>>>> Subject: Re: [PATCH iwl-next v2] ice: Extend auxbus device naming
>>>>>
>>>>> Tue, Apr 23, 2024 at 11:14:59AM CEST, sergey.temerkhanov@intel.com
>>>>> wrote:
>>>>>> Include segment/domain number in the device name to distinguish
>>>>> between
>>>>>> PCI devices located on different root complexes in multi-segment
>>>>>> configurations. Naming is changed from ptp_<bus>_<slot>_clk<clock>  to
>>>>>> ptp_<domain>_<bus>_<slot>_clk<clock>
>>>>>
>>>>> I don't understand why you need to encode pci properties of a parent device
>>>>> into the auxiliary bus name. Could you please explain the motivation? Why
>>>>> you need a bus instance per PF?
>>>>>
>>>>> The rest of the auxbus registrators don't do this. Could you please align? Just
>>>>> have one bus for ice driver and that's it.
>>>>
>>>> This patch adds support for multi-segment PCIe configurations.
>>>> An auxdev is created for each adapter, which has a clock, in the system. There can be
>>>
>>> You are trying to change auxiliary bus name.
>>>
>>>
>>>> more than one adapter present, so there exists a possibility of device naming conflict.
>>>> To avoid it, auxdevs are named according to the PCI geographical addresses of the adapters.
>>>
>>> Why? It's the auxdev, the name should not contain anything related to
>>> PCI, no reason for it. I asked for motivation, you didn't provide any.
>>>
>>
>> We aren't creating one auxbus per PF. We're creating one auxbus per
>> *clock*. The device has multiple clocks in some configurations.
> 
> Does not matter. Why you need separate bus for whatever-instance? Why
> can't you just have one ice-ptp bus and put devices on it?
> 
> 

Because we only want ports on card A to be connected to the card owner
on card A. We were using auxiliary bus to manage this. If we use a
single bus for ice-ptp, then we still have to implement separation
between each set of devices on a single card, which doesn't solve the
problems we had, and at least with the current code using auxiliary bus
doesn't buy us much if it doesn't solve that problem.

>>
>> We need to connect each PF to the same clock controller, because there
>> is only one clock owner for the device in a multi-port card.
> 
> Connect how? But putting a PF-device on a per-clock bus? That sounds
> quite odd. How did you figure out this usage of auxiliary bus?
> 
> 

Yea, its a multi-function board but the functions aren't fully
independent. Each port has its own PF, but multiple ports can be tied to
the same clock. We have similar problems with a variety of HW aspects.
I've been told that the design is simpler for other operating systems,
(something about the way the subsystems work so that they expect ports
to be tied to functions). But its definitely frustrating from Linux
perspective where a single driver instance for the device would be a lot
easier to manage.

>>
>>> Again, could you please avoid creating auxiliary bus per-PF and just
>>> have one auxiliary but per-ice-driver?
>>>
>>
>> We can't have one per-ice driver, because we don't want to connect ports
>>from a different device to the same clock. If you have two ice devices
>> plugged in, the ports on each device are separate from each other.
>>
>> The goal here is to connect the clock ports to the clock owner.
>>
>> Worse, as described here, we do have some devices which combine multiple
>> adapters together and tie their clocks together via HW signaling. In
>> those cases the clocks *do* need to communicate across the device, but
>> only to other ports on the same physical device, not to ports on a
>> different device.
>>
>> Perhaps auxbus is the wrong approach here? but how else can we connect
> 
> Yeah, feels quite wrong.
> 
> 
>> these ports without over-connecting to other ports? We could write
>> bespoke code which finds these devices, but that felt like it was risky
>> and convoluted.
> 
> Sounds you need something you have for DPLL subsystem. Feels to me that
> your hw design is quite disconnected from the Linux device model :/
> 

I'm not 100% sure how DPLL handles this. I'll have to investigate.

One thing I've considered a lot in the past (such as back when I was
working on devlink flash update) was to somehow have a sort of extra
layer where we could register with PCI subsystem some sort of "whole
device" driver, that would get registered first and could connect to the
rest of the function driver instances as they load. But this seems like
it would need a lot of work in the PCI layer, and apparently hasn't been
an issue for other devices? though ice is far from unique at least for
Intel NICs. Its just that the devices got significantly more complex and
functions more interdependent with this generation, and the issues with
global bits were solved in other ways: often via hiding them with
firmware >:(


I've tried explaining the issues with this to HW designers here, but so
far haven't gotten a lot of traction.

>> We could make use of ice_adapter, though we'd need some logic to manage
>> devices which have multiple clocks, as well as devices like the one
>> Sergey is working on which tie multiple adapters together.
> 
> Perhaps that is an answer. Maybe we can make DPLL much more simple after
> that :)

The only major issue with ice_adapter is the case where we have one
clock connected to multiple adapters, but hopefully Sergey has some
ideas for how to solve that.

I think we can mostly make the rest of the logic for the usual case work
via ice_adapter:

1) we already get an ice_adapter reference during early probe
2) each PF knows whether its an owner or not from the NVM/firmware interface
3) we can move the list of ports from the auxbus thing into ice_adapter,
possibly keeping one list per clock number, so that NVMs with multiple
clocks enabled don't have conflicts or put all ports onto the same clock.

I'm not sure how best to solve the weird case when we have multiple
adapters tied together, but perhaps something with extending how the
ice_adapter lookup is done could work? Sergey, I think we can continue
this design discussion off list and come up with a proposal.

We also have to be careful that whatever new solution also handles
things which we got with auxiliary bus:

1) prevent issues with ordering if a clock port loads before the clock
owner PF. If the clock owner loads first, then we need to ensure the
port still gets initialized. If the clock owner loads second, we need to
avoid issues with things not being setup yet, i.e. ensure all the
structures were already initialized (for example by initializing lists
and such when we create the ice_adapter, not when the clock owner loads).

2) prevent issues with teardown ordering that were previously serialized
by the auxiliary bus unregister bits, where the driver unregister
function would wait for all ports to shutdown.

I think this can be done correctly with ice_adapter, but I wanted to
point them out because we get them implicitly with the current design
with auxiliary bus.
Jiri Pirko April 26, 2024, 11:19 a.m. UTC | #7
Wed, Apr 24, 2024 at 06:56:37PM CEST, jacob.e.keller@intel.com wrote:
>
>
>On 4/24/2024 8:07 AM, Jiri Pirko wrote:
>> Wed, Apr 24, 2024 at 12:03:25AM CEST, jacob.e.keller@intel.com wrote:
>>>
>>>
>>> On 4/23/2024 6:14 AM, Jiri Pirko wrote:
>>>> Tue, Apr 23, 2024 at 01:56:55PM CEST, sergey.temerkhanov@intel.com wrote:
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Jiri Pirko <jiri@resnulli.us>
>>>>>> Sent: Tuesday, April 23, 2024 1:36 PM
>>>>>> To: Temerkhanov, Sergey <sergey.temerkhanov@intel.com>
>>>>>> Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Kitszel,
>>>>>> Przemyslaw <przemyslaw.kitszel@intel.com>
>>>>>> Subject: Re: [PATCH iwl-next v2] ice: Extend auxbus device naming
>>>>>>
>>>>>> Tue, Apr 23, 2024 at 11:14:59AM CEST, sergey.temerkhanov@intel.com
>>>>>> wrote:
>>>>>>> Include segment/domain number in the device name to distinguish
>>>>>> between
>>>>>>> PCI devices located on different root complexes in multi-segment
>>>>>>> configurations. Naming is changed from ptp_<bus>_<slot>_clk<clock>  to
>>>>>>> ptp_<domain>_<bus>_<slot>_clk<clock>
>>>>>>
>>>>>> I don't understand why you need to encode pci properties of a parent device
>>>>>> into the auxiliary bus name. Could you please explain the motivation? Why
>>>>>> you need a bus instance per PF?
>>>>>>
>>>>>> The rest of the auxbus registrators don't do this. Could you please align? Just
>>>>>> have one bus for ice driver and that's it.
>>>>>
>>>>> This patch adds support for multi-segment PCIe configurations.
>>>>> An auxdev is created for each adapter, which has a clock, in the system. There can be
>>>>
>>>> You are trying to change auxiliary bus name.
>>>>
>>>>
>>>>> more than one adapter present, so there exists a possibility of device naming conflict.
>>>>> To avoid it, auxdevs are named according to the PCI geographical addresses of the adapters.
>>>>
>>>> Why? It's the auxdev, the name should not contain anything related to
>>>> PCI, no reason for it. I asked for motivation, you didn't provide any.
>>>>
>>>
>>> We aren't creating one auxbus per PF. We're creating one auxbus per
>>> *clock*. The device has multiple clocks in some configurations.
>> 
>> Does not matter. Why you need separate bus for whatever-instance? Why
>> can't you just have one ice-ptp bus and put devices on it?
>> 
>> 
>
>Because we only want ports on card A to be connected to the card owner
>on card A. We were using auxiliary bus to manage this. If we use a

You do that by naming auxiliary bus according to the PCI device
created it, which feels obviously wrong.


>single bus for ice-ptp, then we still have to implement separation
>between each set of devices on a single card, which doesn't solve the
>problems we had, and at least with the current code using auxiliary bus
>doesn't buy us much if it doesn't solve that problem.

I don't think that auxiliary bus is the answer for your problem. Please
don't abuse it.

>
>>>
>>> We need to connect each PF to the same clock controller, because there
>>> is only one clock owner for the device in a multi-port card.
>> 
>> Connect how? But putting a PF-device on a per-clock bus? That sounds
>> quite odd. How did you figure out this usage of auxiliary bus?
>> 
>> 
>
>Yea, its a multi-function board but the functions aren't fully
>independent. Each port has its own PF, but multiple ports can be tied to
>the same clock. We have similar problems with a variety of HW aspects.
>I've been told that the design is simpler for other operating systems,
>(something about the way the subsystems work so that they expect ports
>to be tied to functions). But its definitely frustrating from Linux
>perspective where a single driver instance for the device would be a lot
>easier to manage.

You can always do it by internal accounting in ice, merge multiple PF
pci devices into one internal instance. Or checkout
drivers/base/component.c, perhaps it could be extended for your usecase.


>
>>>
>>>> Again, could you please avoid creating auxiliary bus per-PF and just
>>>> have one auxiliary but per-ice-driver?
>>>>
>>>
>>> We can't have one per-ice driver, because we don't want to connect ports
>>>from a different device to the same clock. If you have two ice devices
>>> plugged in, the ports on each device are separate from each other.
>>>
>>> The goal here is to connect the clock ports to the clock owner.
>>>
>>> Worse, as described here, we do have some devices which combine multiple
>>> adapters together and tie their clocks together via HW signaling. In
>>> those cases the clocks *do* need to communicate across the device, but
>>> only to other ports on the same physical device, not to ports on a
>>> different device.
>>>
>>> Perhaps auxbus is the wrong approach here? but how else can we connect
>> 
>> Yeah, feels quite wrong.
>> 
>> 
>>> these ports without over-connecting to other ports? We could write
>>> bespoke code which finds these devices, but that felt like it was risky
>>> and convoluted.
>> 
>> Sounds you need something you have for DPLL subsystem. Feels to me that
>> your hw design is quite disconnected from the Linux device model :/
>> 
>
>I'm not 100% sure how DPLL handles this. I'll have to investigate.

DPLL leaves the merging on DPLL level. The ice driver just register
entities multiple times. It is specifically designed to fit ice needs.


>
>One thing I've considered a lot in the past (such as back when I was
>working on devlink flash update) was to somehow have a sort of extra
>layer where we could register with PCI subsystem some sort of "whole
>device" driver, that would get registered first and could connect to the
>rest of the function driver instances as they load. But this seems like
>it would need a lot of work in the PCI layer, and apparently hasn't been
>an issue for other devices? though ice is far from unique at least for
>Intel NICs. Its just that the devices got significantly more complex and
>functions more interdependent with this generation, and the issues with
>global bits were solved in other ways: often via hiding them with
>firmware >:(

I think that others could benefit from such "merged device" as well. I
think it is about the time to introduce it.


>
>
>I've tried explaining the issues with this to HW designers here, but so
>far haven't gotten a lot of traction.
>
>>> We could make use of ice_adapter, though we'd need some logic to manage
>>> devices which have multiple clocks, as well as devices like the one
>>> Sergey is working on which tie multiple adapters together.
>> 
>> Perhaps that is an answer. Maybe we can make DPLL much more simple after
>> that :)
>
>The only major issue with ice_adapter is the case where we have one
>clock connected to multiple adapters, but hopefully Sergey has some
>ideas for how to solve that.
>
>I think we can mostly make the rest of the logic for the usual case work
>via ice_adapter:
>
>1) we already get an ice_adapter reference during early probe
>2) each PF knows whether its an owner or not from the NVM/firmware interface
>3) we can move the list of ports from the auxbus thing into ice_adapter,
>possibly keeping one list per clock number, so that NVMs with multiple
>clocks enabled don't have conflicts or put all ports onto the same clock.
>
>I'm not sure how best to solve the weird case when we have multiple
>adapters tied together, but perhaps something with extending how the
>ice_adapter lookup is done could work? Sergey, I think we can continue
>this design discussion off list and come up with a proposal.
>
>We also have to be careful that whatever new solution also handles
>things which we got with auxiliary bus:
>
>1) prevent issues with ordering if a clock port loads before the clock
>owner PF. If the clock owner loads first, then we need to ensure the
>port still gets initialized. If the clock owner loads second, we need to
>avoid issues with things not being setup yet, i.e. ensure all the
>structures were already initialized (for example by initializing lists
>and such when we create the ice_adapter, not when the clock owner loads).
>
>2) prevent issues with teardown ordering that were previously serialized
>by the auxiliary bus unregister bits, where the driver unregister
>function would wait for all ports to shutdown.
>
>I think this can be done correctly with ice_adapter, but I wanted to
>point them out because we get them implicitly with the current design
>with auxiliary bus.
Przemek Kitszel April 26, 2024, 12:49 p.m. UTC | #8
On 4/26/24 13:19, Jiri Pirko wrote:
> Wed, Apr 24, 2024 at 06:56:37PM CEST, jacob.e.keller@intel.com wrote:
>>
>>
>> On 4/24/2024 8:07 AM, Jiri Pirko wrote:
>>> Wed, Apr 24, 2024 at 12:03:25AM CEST, jacob.e.keller@intel.com wrote:
>>>>
>>>>
>>>> On 4/23/2024 6:14 AM, Jiri Pirko wrote:
>>>>> Tue, Apr 23, 2024 at 01:56:55PM CEST, sergey.temerkhanov@intel.com wrote:
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: Jiri Pirko <jiri@resnulli.us>
>>>>>>> Sent: Tuesday, April 23, 2024 1:36 PM
>>>>>>> To: Temerkhanov, Sergey <sergey.temerkhanov@intel.com>
>>>>>>> Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Kitszel,
>>>>>>> Przemyslaw <przemyslaw.kitszel@intel.com>
>>>>>>> Subject: Re: [PATCH iwl-next v2] ice: Extend auxbus device naming
>>>>>>>
>>>>>>> Tue, Apr 23, 2024 at 11:14:59AM CEST, sergey.temerkhanov@intel.com
>>>>>>> wrote:
>>>>>>>> Include segment/domain number in the device name to distinguish
>>>>>>> between
>>>>>>>> PCI devices located on different root complexes in multi-segment
>>>>>>>> configurations. Naming is changed from ptp_<bus>_<slot>_clk<clock>  to
>>>>>>>> ptp_<domain>_<bus>_<slot>_clk<clock>
>>>>>>>
>>>>>>> I don't understand why you need to encode pci properties of a parent device
>>>>>>> into the auxiliary bus name. Could you please explain the motivation? Why
>>>>>>> you need a bus instance per PF?
>>>>>>>
>>>>>>> The rest of the auxbus registrators don't do this. Could you please align? Just
>>>>>>> have one bus for ice driver and that's it.
>>>>>>
>>>>>> This patch adds support for multi-segment PCIe configurations.
>>>>>> An auxdev is created for each adapter, which has a clock, in the system. There can be
>>>>>
>>>>> You are trying to change auxiliary bus name.
>>>>>
>>>>>
>>>>>> more than one adapter present, so there exists a possibility of device naming conflict.
>>>>>> To avoid it, auxdevs are named according to the PCI geographical addresses of the adapters.
>>>>>
>>>>> Why? It's the auxdev, the name should not contain anything related to
>>>>> PCI, no reason for it. I asked for motivation, you didn't provide any.
>>>>>
>>>>
>>>> We aren't creating one auxbus per PF. We're creating one auxbus per
>>>> *clock*. The device has multiple clocks in some configurations.
>>>
>>> Does not matter. Why you need separate bus for whatever-instance? Why
>>> can't you just have one ice-ptp bus and put devices on it?
>>>
>>>
>>
>> Because we only want ports on card A to be connected to the card owner
>> on card A. We were using auxiliary bus to manage this. If we use a
> 
> You do that by naming auxiliary bus according to the PCI device
> created it, which feels obviously wrong.
> 
> 
>> single bus for ice-ptp, then we still have to implement separation
>> between each set of devices on a single card, which doesn't solve the
>> problems we had, and at least with the current code using auxiliary bus
>> doesn't buy us much if it doesn't solve that problem.
> 
> I don't think that auxiliary bus is the answer for your problem. Please
> don't abuse it.
> 
>>
>>>>
>>>> We need to connect each PF to the same clock controller, because there
>>>> is only one clock owner for the device in a multi-port card.
>>>
>>> Connect how? But putting a PF-device on a per-clock bus? That sounds
>>> quite odd. How did you figure out this usage of auxiliary bus?
>>>
>>>
>>
>> Yea, its a multi-function board but the functions aren't fully
>> independent. Each port has its own PF, but multiple ports can be tied to
>> the same clock. We have similar problems with a variety of HW aspects.
>> I've been told that the design is simpler for other operating systems,
>> (something about the way the subsystems work so that they expect ports
>> to be tied to functions). But its definitely frustrating from Linux
>> perspective where a single driver instance for the device would be a lot
>> easier to manage.
> 
> You can always do it by internal accounting in ice, merge multiple PF
> pci devices into one internal instance. Or checkout
> drivers/base/component.c, perhaps it could be extended for your usecase.
> 
> 
>>
>>>>
>>>>> Again, could you please avoid creating auxiliary bus per-PF and just
>>>>> have one auxiliary but per-ice-driver?
>>>>>
>>>>
>>>> We can't have one per-ice driver, because we don't want to connect ports
>>> >from a different device to the same clock. If you have two ice devices
>>>> plugged in, the ports on each device are separate from each other.
>>>>
>>>> The goal here is to connect the clock ports to the clock owner.
>>>>
>>>> Worse, as described here, we do have some devices which combine multiple
>>>> adapters together and tie their clocks together via HW signaling. In
>>>> those cases the clocks *do* need to communicate across the device, but
>>>> only to other ports on the same physical device, not to ports on a
>>>> different device.
>>>>
>>>> Perhaps auxbus is the wrong approach here? but how else can we connect
>>>
>>> Yeah, feels quite wrong.
>>>
>>>
>>>> these ports without over-connecting to other ports? We could write
>>>> bespoke code which finds these devices, but that felt like it was risky
>>>> and convoluted.
>>>
>>> Sounds you need something you have for DPLL subsystem. Feels to me that
>>> your hw design is quite disconnected from the Linux device model :/
>>>
>>
>> I'm not 100% sure how DPLL handles this. I'll have to investigate.
> 
> DPLL leaves the merging on DPLL level. The ice driver just register
> entities multiple times. It is specifically designed to fit ice needs.
> 
> 
>>
>> One thing I've considered a lot in the past (such as back when I was
>> working on devlink flash update) was to somehow have a sort of extra
>> layer where we could register with PCI subsystem some sort of "whole
>> device" driver, that would get registered first and could connect to the
>> rest of the function driver instances as they load. But this seems like
>> it would need a lot of work in the PCI layer, and apparently hasn't been
>> an issue for other devices? though ice is far from unique at least for
>> Intel NICs. Its just that the devices got significantly more complex and
>> functions more interdependent with this generation, and the issues with
>> global bits were solved in other ways: often via hiding them with
>> firmware >:(
> 
> I think that others could benefit from such "merged device" as well. I
> think it is about the time to introduce it.

so far I see that we want to merge based on different bits, but let's
see what will come from exploratory work that Sergey is doing right now.

and anything that is not a device/driver feels much more lightweight to
operate with (think &ice_adapter, but extended with more fields).
Could you elaborate more on what you have in mind? (Or what you could
imagine reusing).

offtop:
It will be a good hook to put resources that are shared across ports
under it in devlink resources, so making this "merged device" an entity
would enable higher layer over what we have with devlink xxx $pf.

> 
> 
>>
>>
>> I've tried explaining the issues with this to HW designers here, but so
>> far haven't gotten a lot of traction.
>>
>>>> We could make use of ice_adapter, though we'd need some logic to manage
>>>> devices which have multiple clocks, as well as devices like the one
>>>> Sergey is working on which tie multiple adapters together.
>>>
>>> Perhaps that is an answer. Maybe we can make DPLL much more simple after
>>> that :)
>>
>> The only major issue with ice_adapter is the case where we have one
>> clock connected to multiple adapters, but hopefully Sergey has some
>> ideas for how to solve that.
>>
>> I think we can mostly make the rest of the logic for the usual case work
>> via ice_adapter:
>>
>> 1) we already get an ice_adapter reference during early probe
>> 2) each PF knows whether its an owner or not from the NVM/firmware interface
>> 3) we can move the list of ports from the auxbus thing into ice_adapter,
>> possibly keeping one list per clock number, so that NVMs with multiple
>> clocks enabled don't have conflicts or put all ports onto the same clock.
>>
>> I'm not sure how best to solve the weird case when we have multiple
>> adapters tied together, but perhaps something with extending how the
>> ice_adapter lookup is done could work? Sergey, I think we can continue
>> this design discussion off list and come up with a proposal.
>>
>> We also have to be careful that whatever new solution also handles
>> things which we got with auxiliary bus:
>>
>> 1) prevent issues with ordering if a clock port loads before the clock
>> owner PF. If the clock owner loads first, then we need to ensure the
>> port still gets initialized. If the clock owner loads second, we need to
>> avoid issues with things not being setup yet, i.e. ensure all the
>> structures were already initialized (for example by initializing lists
>> and such when we create the ice_adapter, not when the clock owner loads).
>>
>> 2) prevent issues with teardown ordering that were previously serialized
>> by the auxiliary bus unregister bits, where the driver unregister
>> function would wait for all ports to shutdown.
>>
>> I think this can be done correctly with ice_adapter, but I wanted to
>> point them out because we get them implicitly with the current design
>> with auxiliary bus.
Jiri Pirko April 26, 2024, 1:43 p.m. UTC | #9
Fri, Apr 26, 2024 at 02:49:40PM CEST, przemyslaw.kitszel@intel.com wrote:
>On 4/26/24 13:19, Jiri Pirko wrote:
>> Wed, Apr 24, 2024 at 06:56:37PM CEST, jacob.e.keller@intel.com wrote:
>> > 
>> > 
>> > On 4/24/2024 8:07 AM, Jiri Pirko wrote:
>> > > Wed, Apr 24, 2024 at 12:03:25AM CEST, jacob.e.keller@intel.com wrote:
>> > > > 
>> > > > 
>> > > > On 4/23/2024 6:14 AM, Jiri Pirko wrote:
>> > > > > Tue, Apr 23, 2024 at 01:56:55PM CEST, sergey.temerkhanov@intel.com wrote:
>> > > > > > 
>> > > > > > 
>> > > > > > > -----Original Message-----
>> > > > > > > From: Jiri Pirko <jiri@resnulli.us>
>> > > > > > > Sent: Tuesday, April 23, 2024 1:36 PM
>> > > > > > > To: Temerkhanov, Sergey <sergey.temerkhanov@intel.com>
>> > > > > > > Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Kitszel,
>> > > > > > > Przemyslaw <przemyslaw.kitszel@intel.com>
>> > > > > > > Subject: Re: [PATCH iwl-next v2] ice: Extend auxbus device naming
>> > > > > > > 
>> > > > > > > Tue, Apr 23, 2024 at 11:14:59AM CEST, sergey.temerkhanov@intel.com
>> > > > > > > wrote:
>> > > > > > > > Include segment/domain number in the device name to distinguish
>> > > > > > > between
>> > > > > > > > PCI devices located on different root complexes in multi-segment
>> > > > > > > > configurations. Naming is changed from ptp_<bus>_<slot>_clk<clock>  to
>> > > > > > > > ptp_<domain>_<bus>_<slot>_clk<clock>
>> > > > > > > 
>> > > > > > > I don't understand why you need to encode pci properties of a parent device
>> > > > > > > into the auxiliary bus name. Could you please explain the motivation? Why
>> > > > > > > you need a bus instance per PF?
>> > > > > > > 
>> > > > > > > The rest of the auxbus registrators don't do this. Could you please align? Just
>> > > > > > > have one bus for ice driver and that's it.
>> > > > > > 
>> > > > > > This patch adds support for multi-segment PCIe configurations.
>> > > > > > An auxdev is created for each adapter, which has a clock, in the system. There can be
>> > > > > 
>> > > > > You are trying to change auxiliary bus name.
>> > > > > 
>> > > > > 
>> > > > > > more than one adapter present, so there exists a possibility of device naming conflict.
>> > > > > > To avoid it, auxdevs are named according to the PCI geographical addresses of the adapters.
>> > > > > 
>> > > > > Why? It's the auxdev, the name should not contain anything related to
>> > > > > PCI, no reason for it. I asked for motivation, you didn't provide any.
>> > > > > 
>> > > > 
>> > > > We aren't creating one auxbus per PF. We're creating one auxbus per
>> > > > *clock*. The device has multiple clocks in some configurations.
>> > > 
>> > > Does not matter. Why you need separate bus for whatever-instance? Why
>> > > can't you just have one ice-ptp bus and put devices on it?
>> > > 
>> > > 
>> > 
>> > Because we only want ports on card A to be connected to the card owner
>> > on card A. We were using auxiliary bus to manage this. If we use a
>> 
>> You do that by naming auxiliary bus according to the PCI device
>> created it, which feels obviously wrong.
>> 
>> 
>> > single bus for ice-ptp, then we still have to implement separation
>> > between each set of devices on a single card, which doesn't solve the
>> > problems we had, and at least with the current code using auxiliary bus
>> > doesn't buy us much if it doesn't solve that problem.
>> 
>> I don't think that auxiliary bus is the answer for your problem. Please
>> don't abuse it.
>> 
>> > 
>> > > > 
>> > > > We need to connect each PF to the same clock controller, because there
>> > > > is only one clock owner for the device in a multi-port card.
>> > > 
>> > > Connect how? But putting a PF-device on a per-clock bus? That sounds
>> > > quite odd. How did you figure out this usage of auxiliary bus?
>> > > 
>> > > 
>> > 
>> > Yea, its a multi-function board but the functions aren't fully
>> > independent. Each port has its own PF, but multiple ports can be tied to
>> > the same clock. We have similar problems with a variety of HW aspects.
>> > I've been told that the design is simpler for other operating systems,
>> > (something about the way the subsystems work so that they expect ports
>> > to be tied to functions). But its definitely frustrating from Linux
>> > perspective where a single driver instance for the device would be a lot
>> > easier to manage.
>> 
>> You can always do it by internal accounting in ice, merge multiple PF
>> pci devices into one internal instance. Or checkout
>> drivers/base/component.c, perhaps it could be extended for your usecase.
>> 
>> 
>> > 
>> > > > 
>> > > > > Again, could you please avoid creating auxiliary bus per-PF and just
>> > > > > have one auxiliary but per-ice-driver?
>> > > > > 
>> > > > 
>> > > > We can't have one per-ice driver, because we don't want to connect ports
>> > > >from a different device to the same clock. If you have two ice devices
>> > > > plugged in, the ports on each device are separate from each other.
>> > > > 
>> > > > The goal here is to connect the clock ports to the clock owner.
>> > > > 
>> > > > Worse, as described here, we do have some devices which combine multiple
>> > > > adapters together and tie their clocks together via HW signaling. In
>> > > > those cases the clocks *do* need to communicate across the device, but
>> > > > only to other ports on the same physical device, not to ports on a
>> > > > different device.
>> > > > 
>> > > > Perhaps auxbus is the wrong approach here? but how else can we connect
>> > > 
>> > > Yeah, feels quite wrong.
>> > > 
>> > > 
>> > > > these ports without over-connecting to other ports? We could write
>> > > > bespoke code which finds these devices, but that felt like it was risky
>> > > > and convoluted.
>> > > 
>> > > Sounds you need something you have for DPLL subsystem. Feels to me that
>> > > your hw design is quite disconnected from the Linux device model :/
>> > > 
>> > 
>> > I'm not 100% sure how DPLL handles this. I'll have to investigate.
>> 
>> DPLL leaves the merging on DPLL level. The ice driver just register
>> entities multiple times. It is specifically designed to fit ice needs.
>> 
>> 
>> > 
>> > One thing I've considered a lot in the past (such as back when I was
>> > working on devlink flash update) was to somehow have a sort of extra
>> > layer where we could register with PCI subsystem some sort of "whole
>> > device" driver, that would get registered first and could connect to the
>> > rest of the function driver instances as they load. But this seems like
>> > it would need a lot of work in the PCI layer, and apparently hasn't been
>> > an issue for other devices? though ice is far from unique at least for
>> > Intel NICs. Its just that the devices got significantly more complex and
>> > functions more interdependent with this generation, and the issues with
>> > global bits were solved in other ways: often via hiding them with
>> > firmware >:(
>> 
>> I think that others could benefit from such "merged device" as well. I
>> think it is about the time to introduce it.
>
>so far I see that we want to merge based on different bits, but let's
>see what will come from exploratory work that Sergey is doing right now.
>
>and anything that is not a device/driver feels much more lightweight to
>operate with (think &ice_adapter, but extended with more fields).
>Could you elaborate more on what you have in mind? (Or what you could
>imagine reusing).

Nothing concrete really. See below.

>
>offtop:
>It will be a good hook to put resources that are shared across ports
>under it in devlink resources, so making this "merged device" an entity
>would enable higher layer over what we have with devlink xxx $pf.

Yes. That would be great. I think we need a "device" in a sense of
struct device instance. Then you can properly model the relationships in
sysfs, you can have devlink for that, etc.

drivers/base/component.c does merging of multiple devices, but it does
not create a "merged device", this is missing there. So we have 2
options:

1) extend drivers/base/component.c to allow to create a merged device
   entity
2) implement merged device infrastructure separatelly.

IDK. I believe we need to rope more people in.


>
>> 
>> 
>> > 
>> > 
>> > I've tried explaining the issues with this to HW designers here, but so
>> > far haven't gotten a lot of traction.
>> > 
>> > > > We could make use of ice_adapter, though we'd need some logic to manage
>> > > > devices which have multiple clocks, as well as devices like the one
>> > > > Sergey is working on which tie multiple adapters together.
>> > > 
>> > > Perhaps that is an answer. Maybe we can make DPLL much more simple after
>> > > that :)
>> > 
>> > The only major issue with ice_adapter is the case where we have one
>> > clock connected to multiple adapters, but hopefully Sergey has some
>> > ideas for how to solve that.
>> > 
>> > I think we can mostly make the rest of the logic for the usual case work
>> > via ice_adapter:
>> > 
>> > 1) we already get an ice_adapter reference during early probe
>> > 2) each PF knows whether its an owner or not from the NVM/firmware interface
>> > 3) we can move the list of ports from the auxbus thing into ice_adapter,
>> > possibly keeping one list per clock number, so that NVMs with multiple
>> > clocks enabled don't have conflicts or put all ports onto the same clock.
>> > 
>> > I'm not sure how best to solve the weird case when we have multiple
>> > adapters tied together, but perhaps something with extending how the
>> > ice_adapter lookup is done could work? Sergey, I think we can continue
>> > this design discussion off list and come up with a proposal.
>> > 
>> > We also have to be careful that whatever new solution also handles
>> > things which we got with auxiliary bus:
>> > 
>> > 1) prevent issues with ordering if a clock port loads before the clock
>> > owner PF. If the clock owner loads first, then we need to ensure the
>> > port still gets initialized. If the clock owner loads second, we need to
>> > avoid issues with things not being setup yet, i.e. ensure all the
>> > structures were already initialized (for example by initializing lists
>> > and such when we create the ice_adapter, not when the clock owner loads).
>> > 
>> > 2) prevent issues with teardown ordering that were previously serialized
>> > by the auxiliary bus unregister bits, where the driver unregister
>> > function would wait for all ports to shutdown.
>> > 
>> > I think this can be done correctly with ice_adapter, but I wanted to
>> > point them out because we get them implicitly with the current design
>> > with auxiliary bus.
>
Jacob Keller April 26, 2024, 10:25 p.m. UTC | #10
On 4/26/2024 6:43 AM, Jiri Pirko wrote:
> Fri, Apr 26, 2024 at 02:49:40PM CEST, przemyslaw.kitszel@intel.com wrote:
>> On 4/26/24 13:19, Jiri Pirko wrote:
>>> Wed, Apr 24, 2024 at 06:56:37PM CEST, jacob.e.keller@intel.com wrote:
>>>>
>>>>
>>>> On 4/24/2024 8:07 AM, Jiri Pirko wrote:
>>>>> Wed, Apr 24, 2024 at 12:03:25AM CEST, jacob.e.keller@intel.com wrote:
>>>>>>
>>>>>>
>>>>>> On 4/23/2024 6:14 AM, Jiri Pirko wrote:
>>>>>>> Tue, Apr 23, 2024 at 01:56:55PM CEST, sergey.temerkhanov@intel.com wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: Jiri Pirko <jiri@resnulli.us>
>>>>>>>>> Sent: Tuesday, April 23, 2024 1:36 PM
>>>>>>>>> To: Temerkhanov, Sergey <sergey.temerkhanov@intel.com>
>>>>>>>>> Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Kitszel,
>>>>>>>>> Przemyslaw <przemyslaw.kitszel@intel.com>
>>>>>>>>> Subject: Re: [PATCH iwl-next v2] ice: Extend auxbus device naming
>>>>>>>>>
>>>>>>>>> Tue, Apr 23, 2024 at 11:14:59AM CEST, sergey.temerkhanov@intel.com
>>>>>>>>> wrote:
>>>>>>>>>> Include segment/domain number in the device name to distinguish
>>>>>>>>> between
>>>>>>>>>> PCI devices located on different root complexes in multi-segment
>>>>>>>>>> configurations. Naming is changed from ptp_<bus>_<slot>_clk<clock>  to
>>>>>>>>>> ptp_<domain>_<bus>_<slot>_clk<clock>
>>>>>>>>>
>>>>>>>>> I don't understand why you need to encode pci properties of a parent device
>>>>>>>>> into the auxiliary bus name. Could you please explain the motivation? Why
>>>>>>>>> you need a bus instance per PF?
>>>>>>>>>
>>>>>>>>> The rest of the auxbus registrators don't do this. Could you please align? Just
>>>>>>>>> have one bus for ice driver and that's it.
>>>>>>>>
>>>>>>>> This patch adds support for multi-segment PCIe configurations.
>>>>>>>> An auxdev is created for each adapter, which has a clock, in the system. There can be
>>>>>>>
>>>>>>> You are trying to change auxiliary bus name.
>>>>>>>
>>>>>>>
>>>>>>>> more than one adapter present, so there exists a possibility of device naming conflict.
>>>>>>>> To avoid it, auxdevs are named according to the PCI geographical addresses of the adapters.
>>>>>>>
>>>>>>> Why? It's the auxdev, the name should not contain anything related to
>>>>>>> PCI, no reason for it. I asked for motivation, you didn't provide any.
>>>>>>>
>>>>>>
>>>>>> We aren't creating one auxbus per PF. We're creating one auxbus per
>>>>>> *clock*. The device has multiple clocks in some configurations.
>>>>>
>>>>> Does not matter. Why you need separate bus for whatever-instance? Why
>>>>> can't you just have one ice-ptp bus and put devices on it?
>>>>>
>>>>>
>>>>
>>>> Because we only want ports on card A to be connected to the card owner
>>>> on card A. We were using auxiliary bus to manage this. If we use a
>>>
>>> You do that by naming auxiliary bus according to the PCI device
>>> created it, which feels obviously wrong.
>>>
>>>
>>>> single bus for ice-ptp, then we still have to implement separation
>>>> between each set of devices on a single card, which doesn't solve the
>>>> problems we had, and at least with the current code using auxiliary bus
>>>> doesn't buy us much if it doesn't solve that problem.
>>>
>>> I don't think that auxiliary bus is the answer for your problem. Please
>>> don't abuse it.
>>>
>>>>
>>>>>>
>>>>>> We need to connect each PF to the same clock controller, because there
>>>>>> is only one clock owner for the device in a multi-port card.
>>>>>
>>>>> Connect how? But putting a PF-device on a per-clock bus? That sounds
>>>>> quite odd. How did you figure out this usage of auxiliary bus?
>>>>>
>>>>>
>>>>
>>>> Yea, its a multi-function board but the functions aren't fully
>>>> independent. Each port has its own PF, but multiple ports can be tied to
>>>> the same clock. We have similar problems with a variety of HW aspects.
>>>> I've been told that the design is simpler for other operating systems,
>>>> (something about the way the subsystems work so that they expect ports
>>>> to be tied to functions). But its definitely frustrating from Linux
>>>> perspective where a single driver instance for the device would be a lot
>>>> easier to manage.
>>>
>>> You can always do it by internal accounting in ice, merge multiple PF
>>> pci devices into one internal instance. Or checkout
>>> drivers/base/component.c, perhaps it could be extended for your usecase.
>>>
>>>
>>>>
>>>>>>
>>>>>>> Again, could you please avoid creating auxiliary bus per-PF and just
>>>>>>> have one auxiliary but per-ice-driver?
>>>>>>>
>>>>>>
>>>>>> We can't have one per-ice driver, because we don't want to connect ports
>>>>> >from a different device to the same clock. If you have two ice devices
>>>>>> plugged in, the ports on each device are separate from each other.
>>>>>>
>>>>>> The goal here is to connect the clock ports to the clock owner.
>>>>>>
>>>>>> Worse, as described here, we do have some devices which combine multiple
>>>>>> adapters together and tie their clocks together via HW signaling. In
>>>>>> those cases the clocks *do* need to communicate across the device, but
>>>>>> only to other ports on the same physical device, not to ports on a
>>>>>> different device.
>>>>>>
>>>>>> Perhaps auxbus is the wrong approach here? but how else can we connect
>>>>>
>>>>> Yeah, feels quite wrong.
>>>>>
>>>>>
>>>>>> these ports without over-connecting to other ports? We could write
>>>>>> bespoke code which finds these devices, but that felt like it was risky
>>>>>> and convoluted.
>>>>>
>>>>> Sounds you need something you have for DPLL subsystem. Feels to me that
>>>>> your hw design is quite disconnected from the Linux device model :/
>>>>>
>>>>
>>>> I'm not 100% sure how DPLL handles this. I'll have to investigate.
>>>
>>> DPLL leaves the merging on DPLL level. The ice driver just register
>>> entities multiple times. It is specifically designed to fit ice needs.
>>>
>>>
>>>>
>>>> One thing I've considered a lot in the past (such as back when I was
>>>> working on devlink flash update) was to somehow have a sort of extra
>>>> layer where we could register with PCI subsystem some sort of "whole
>>>> device" driver, that would get registered first and could connect to the
>>>> rest of the function driver instances as they load. But this seems like
>>>> it would need a lot of work in the PCI layer, and apparently hasn't been
>>>> an issue for other devices? though ice is far from unique at least for
>>>> Intel NICs. Its just that the devices got significantly more complex and
>>>> functions more interdependent with this generation, and the issues with
>>>> global bits were solved in other ways: often via hiding them with
>>>> firmware >:(
>>>
>>> I think that others could benefit from such "merged device" as well. I
>>> think it is about the time to introduce it.
>>
>> so far I see that we want to merge based on different bits, but let's
>> see what will come from exploratory work that Sergey is doing right now.
>>
>> and anything that is not a device/driver feels much more lightweight to
>> operate with (think &ice_adapter, but extended with more fields).
>> Could you elaborate more on what you have in mind? (Or what you could
>> imagine reusing).
> 
> Nothing concrete really. See below.
> 
>>
>> offtop:
>> It will be a good hook to put resources that are shared across ports
>> under it in devlink resources, so making this "merged device" an entity
>> would enable higher layer over what we have with devlink xxx $pf.
> 
> Yes. That would be great. I think we need a "device" in a sense of
> struct device instance. Then you can properly model the relationships in
> sysfs, you can have devlink for that, etc.
> 
> drivers/base/component.c does merging of multiple devices, but it does
> not create a "merged device", this is missing there. So we have 2
> options:
> 
> 1) extend drivers/base/component.c to allow to create a merged device
>    entity
> 2) implement merged device infrastructure separatelly.
> 
> IDK. I believe we need to rope more people in.
> 
> 

drivers/base/component.c looks pretty close to what we want. Each PF
would register as a component, and then a driver would register as the
component master. It does lack a struct device, so might be challenging
to use with devlink unless we just opted to pick a device from the
components as the main device?

extending components to have a device could be interesting, though
perhaps its not exactly the best place. It seems like components are
about combining a lot of small devices that ultimately combine into one
functionality at a logical level.

That is pretty close to what we want here: one entity to control global
portions of an otherwise multi-function card.

Extending it to include a struct device could work but I'm not 100% sure
how that fits into the component system.

We could try extending PCI subsystem to do something similar to
components which is vaguely what I described a couple replies ago.

ice_adapter is basically doing this but more bespoke and custom, and
still lacks the extra struct device.
Jiri Pirko April 29, 2024, 11:55 a.m. UTC | #11
Sat, Apr 27, 2024 at 12:25:44AM CEST, jacob.e.keller@intel.com wrote:
>
>
>On 4/26/2024 6:43 AM, Jiri Pirko wrote:
>> Fri, Apr 26, 2024 at 02:49:40PM CEST, przemyslaw.kitszel@intel.com wrote:
>>> On 4/26/24 13:19, Jiri Pirko wrote:
>>>> Wed, Apr 24, 2024 at 06:56:37PM CEST, jacob.e.keller@intel.com wrote:
>>>>>
>>>>>
>>>>> On 4/24/2024 8:07 AM, Jiri Pirko wrote:
>>>>>> Wed, Apr 24, 2024 at 12:03:25AM CEST, jacob.e.keller@intel.com wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 4/23/2024 6:14 AM, Jiri Pirko wrote:
>>>>>>>> Tue, Apr 23, 2024 at 01:56:55PM CEST, sergey.temerkhanov@intel.com wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: Jiri Pirko <jiri@resnulli.us>
>>>>>>>>>> Sent: Tuesday, April 23, 2024 1:36 PM
>>>>>>>>>> To: Temerkhanov, Sergey <sergey.temerkhanov@intel.com>
>>>>>>>>>> Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Kitszel,
>>>>>>>>>> Przemyslaw <przemyslaw.kitszel@intel.com>
>>>>>>>>>> Subject: Re: [PATCH iwl-next v2] ice: Extend auxbus device naming
>>>>>>>>>>
>>>>>>>>>> Tue, Apr 23, 2024 at 11:14:59AM CEST, sergey.temerkhanov@intel.com
>>>>>>>>>> wrote:
>>>>>>>>>>> Include segment/domain number in the device name to distinguish
>>>>>>>>>> between
>>>>>>>>>>> PCI devices located on different root complexes in multi-segment
>>>>>>>>>>> configurations. Naming is changed from ptp_<bus>_<slot>_clk<clock>  to
>>>>>>>>>>> ptp_<domain>_<bus>_<slot>_clk<clock>
>>>>>>>>>>
>>>>>>>>>> I don't understand why you need to encode pci properties of a parent device
>>>>>>>>>> into the auxiliary bus name. Could you please explain the motivation? Why
>>>>>>>>>> you need a bus instance per PF?
>>>>>>>>>>
>>>>>>>>>> The rest of the auxbus registrators don't do this. Could you please align? Just
>>>>>>>>>> have one bus for ice driver and that's it.
>>>>>>>>>
>>>>>>>>> This patch adds support for multi-segment PCIe configurations.
>>>>>>>>> An auxdev is created for each adapter, which has a clock, in the system. There can be
>>>>>>>>
>>>>>>>> You are trying to change auxiliary bus name.
>>>>>>>>
>>>>>>>>
>>>>>>>>> more than one adapter present, so there exists a possibility of device naming conflict.
>>>>>>>>> To avoid it, auxdevs are named according to the PCI geographical addresses of the adapters.
>>>>>>>>
>>>>>>>> Why? It's the auxdev, the name should not contain anything related to
>>>>>>>> PCI, no reason for it. I asked for motivation, you didn't provide any.
>>>>>>>>
>>>>>>>
>>>>>>> We aren't creating one auxbus per PF. We're creating one auxbus per
>>>>>>> *clock*. The device has multiple clocks in some configurations.
>>>>>>
>>>>>> Does not matter. Why you need separate bus for whatever-instance? Why
>>>>>> can't you just have one ice-ptp bus and put devices on it?
>>>>>>
>>>>>>
>>>>>
>>>>> Because we only want ports on card A to be connected to the card owner
>>>>> on card A. We were using auxiliary bus to manage this. If we use a
>>>>
>>>> You do that by naming auxiliary bus according to the PCI device
>>>> created it, which feels obviously wrong.
>>>>
>>>>
>>>>> single bus for ice-ptp, then we still have to implement separation
>>>>> between each set of devices on a single card, which doesn't solve the
>>>>> problems we had, and at least with the current code using auxiliary bus
>>>>> doesn't buy us much if it doesn't solve that problem.
>>>>
>>>> I don't think that auxiliary bus is the answer for your problem. Please
>>>> don't abuse it.
>>>>
>>>>>
>>>>>>>
>>>>>>> We need to connect each PF to the same clock controller, because there
>>>>>>> is only one clock owner for the device in a multi-port card.
>>>>>>
>>>>>> Connect how? But putting a PF-device on a per-clock bus? That sounds
>>>>>> quite odd. How did you figure out this usage of auxiliary bus?
>>>>>>
>>>>>>
>>>>>
>>>>> Yea, its a multi-function board but the functions aren't fully
>>>>> independent. Each port has its own PF, but multiple ports can be tied to
>>>>> the same clock. We have similar problems with a variety of HW aspects.
>>>>> I've been told that the design is simpler for other operating systems,
>>>>> (something about the way the subsystems work so that they expect ports
>>>>> to be tied to functions). But its definitely frustrating from Linux
>>>>> perspective where a single driver instance for the device would be a lot
>>>>> easier to manage.
>>>>
>>>> You can always do it by internal accounting in ice, merge multiple PF
>>>> pci devices into one internal instance. Or checkout
>>>> drivers/base/component.c, perhaps it could be extended for your usecase.
>>>>
>>>>
>>>>>
>>>>>>>
>>>>>>>> Again, could you please avoid creating auxiliary bus per-PF and just
>>>>>>>> have one auxiliary but per-ice-driver?
>>>>>>>>
>>>>>>>
>>>>>>> We can't have one per-ice driver, because we don't want to connect ports
>>>>>> >from a different device to the same clock. If you have two ice devices
>>>>>>> plugged in, the ports on each device are separate from each other.
>>>>>>>
>>>>>>> The goal here is to connect the clock ports to the clock owner.
>>>>>>>
>>>>>>> Worse, as described here, we do have some devices which combine multiple
>>>>>>> adapters together and tie their clocks together via HW signaling. In
>>>>>>> those cases the clocks *do* need to communicate across the device, but
>>>>>>> only to other ports on the same physical device, not to ports on a
>>>>>>> different device.
>>>>>>>
>>>>>>> Perhaps auxbus is the wrong approach here? but how else can we connect
>>>>>>
>>>>>> Yeah, feels quite wrong.
>>>>>>
>>>>>>
>>>>>>> these ports without over-connecting to other ports? We could write
>>>>>>> bespoke code which finds these devices, but that felt like it was risky
>>>>>>> and convoluted.
>>>>>>
>>>>>> Sounds you need something you have for DPLL subsystem. Feels to me that
>>>>>> your hw design is quite disconnected from the Linux device model :/
>>>>>>
>>>>>
>>>>> I'm not 100% sure how DPLL handles this. I'll have to investigate.
>>>>
>>>> DPLL leaves the merging on DPLL level. The ice driver just register
>>>> entities multiple times. It is specifically designed to fit ice needs.
>>>>
>>>>
>>>>>
>>>>> One thing I've considered a lot in the past (such as back when I was
>>>>> working on devlink flash update) was to somehow have a sort of extra
>>>>> layer where we could register with PCI subsystem some sort of "whole
>>>>> device" driver, that would get registered first and could connect to the
>>>>> rest of the function driver instances as they load. But this seems like
>>>>> it would need a lot of work in the PCI layer, and apparently hasn't been
>>>>> an issue for other devices? though ice is far from unique at least for
>>>>> Intel NICs. Its just that the devices got significantly more complex and
>>>>> functions more interdependent with this generation, and the issues with
>>>>> global bits were solved in other ways: often via hiding them with
>>>>> firmware >:(
>>>>
>>>> I think that others could benefit from such "merged device" as well. I
>>>> think it is about the time to introduce it.
>>>
>>> so far I see that we want to merge based on different bits, but let's
>>> see what will come from exploratory work that Sergey is doing right now.
>>>
>>> and anything that is not a device/driver feels much more lightweight to
>>> operate with (think &ice_adapter, but extended with more fields).
>>> Could you elaborate more on what you have in mind? (Or what you could
>>> imagine reusing).
>> 
>> Nothing concrete really. See below.
>> 
>>>
>>> offtop:
>>> It will be a good hook to put resources that are shared across ports
>>> under it in devlink resources, so making this "merged device" an entity
>>> would enable higher layer over what we have with devlink xxx $pf.
>> 
>> Yes. That would be great. I think we need a "device" in a sense of
>> struct device instance. Then you can properly model the relationships in
>> sysfs, you can have devlink for that, etc.
>> 
>> drivers/base/component.c does merging of multiple devices, but it does
>> not create a "merged device", this is missing there. So we have 2
>> options:
>> 
>> 1) extend drivers/base/component.c to allow to create a merged device
>>    entity
>> 2) implement merged device infrastructure separatelly.
>> 
>> IDK. I believe we need to rope more people in.
>> 
>> 
>
>drivers/base/component.c looks pretty close to what we want. Each PF
>would register as a component, and then a driver would register as the
>component master. It does lack a struct device, so might be challenging
>to use with devlink unless we just opted to pick a device from the
>components as the main device?

If I read the code correctly, the master component has to be a device as
well. This is the missing piece I believe.


>
>extending components to have a device could be interesting, though
>perhaps its not exactly the best place. It seems like components are
>about combining a lot of small devices that ultimately combine into one
>functionality at a logical level.
>
>That is pretty close to what we want here: one entity to control global
>portions of an otherwise multi-function card.
>
>Extending it to include a struct device could work but I'm not 100% sure
>how that fits into the component system.

Who knows? we need to rope them into this discussion...


>
>We could try extending PCI subsystem to do something similar to
>components which is vaguely what I described a couple replies ago.

Well, feels to me this is more generic problem than PCI. One level
above.


>
>ice_adapter is basically doing this but more bespoke and custom, and
>still lacks the extra struct device.

Correct.
Jacob Keller April 29, 2024, 10:28 p.m. UTC | #12
Hi,

I'm attempting to widen the audience a bit for a discussion about the
ice driver and its current (ab)use and future of using auxiliary bus to
manage cross-function inter-dependencies.

I've included the latest discussion with Jiri, but the full context can
be read from lore.kernel.org:

https://lore.kernel.org/intel-wired-lan/20240423091459.72216-1-sergey.temerkhanov@intel.com/

There is also related work from Karol for the 2xNAC case discussed below:

https://lore.kernel.org/intel-wired-lan/20240424133542.113933-26-karol.kolacinski@intel.com/

As a short summary, ice is currently (as of at least commit d938a8cca88a
("ice: Auxbus devices & driver for E822 TS") merged in v6.7) using
auxiliary bus to handle some challenges we have when dealing with the
multi-function hardware that has global functionality that cannot be
independently controlled by each function. The specific context is the
IEEE 1588 PTP Hardware Clock functionality, but there are other areas of
the device which have similar challenges.

Other auxiliary driver and bus implementations are intended for
abstracting some device functionality into a separate driver. A single
module creates a bus, and then devices connect to it, each device then
talks to that driver.

In the ice model, each PF that owns a clock creates its own bus, and
then each port (including the PF that owns the clock) creates a device
on the matching bus for that physical PCI device.

As part of developing a new product, we were refactoring this auxiliary
bus implementation when Jiri pointed out that the entire use of
auxiliary bus seems incorrect. We are generating the bus name so that it
includes information about the PCI device, in order to ensure that ports
connecting to the bus connect to the driver "driver". In addition we're
basically *only* using this to get a reference to each driver without
really providing a coherent logical separation of functionality.

The future 2xNAC product complicates things even further, as we have in
that case multiple chips on the board, called NACs, and each of these is
a PCI device with its own functions. The hardware clock on NAC 0 is
connected to NAC 1, so that functions on NAC 1 ports share the same
clock domain as the functions on NAC 0. So in this case not only do we
need to tie functions on the same multi-function device together, but we
also need to in some boards to tie two sets of functions across two
devices together.

Jiri's arguments in the above thread have convinced me that we should
not be using auxiliary bus to solve this problem. It was never intended
to solve this kind of problem.

Fundamentally the issue is that we have a PCI device with multiple
functions, but these functions are not fully independent. Some parts of
the functionality cannot be correctly managed by all functions at once,
and functions need to be aware of what the other functions are doing.
For PTP, this means one function controlling and exposing the PTP
hardware clock which is used for timestamping across multiple functions.
There are also various global registers which modifying affects the
entire device. This is not managed very well in the existing drivers,
and breaks the simple PCI model of functions being independent devices.

Recently, Michal added struct ice_adapter to the driver. This is a
reference counted structure which each function acquires as it loads,
based on the PCI information so that all functions on a device get the
same ice_adapter. Sergey is currently investigating refactoring the rest
of the PTP logic to use ice_adapter instead of auxiliary bus.

Jiri also pointed out the component logic in drivers/base/component.c
which seems to be a subsystem extension to manage a related problem of
multiple devices that work together to provide functionality of an
aggregate device.

The component code doesn't seem to quite match what we want for ice, as
it requires the aggregate device to register. In the ice case, we might
have 2, 4, or 8 functions each with a pci_dev and then no struct device
to represent the combined adapter.

I also have considered something like an extension of the PCI driver
model to allow adding a combined instance that manages the device so
we'd have a sort of like "adapter driver" and a "function driver" or
similar. Jiri pointed out that the problem feels a bit more generic and
sort of "above" the PCI layer though.

Essentially, we want something like a device subsystem improvement where
we can have each function register to connect to a combined manager
device which can control the global functionality of the device. In the
2xNAC case, this would need to cross both NAC devices.

It is likely we can extend ice_adapter to do this for the ice driver,
but this would then be a bespoke device specific solution. It also
doesn't provide the struct device. We could re-use the struct device
from function 0, but then we aren't really representing topology of the
connected devices as neatly.

While the focus is currently on the PTP case, there are also some other
bits that could be improved such as exposing only a single devlink
instance for the whole device instead of one devlink instance per function.

I'm hoping we can get some direction on what method we should pursue,
and possibly pointers to folks who might have an interest in this, and
could help us figure out the path towards a better solution.

For now, Sergey is going to continue prototyping and experimenting with
the ice_adapter implementation.

Here follows the most recent part of the discussion:

On 4/29/2024 4:55 AM, Jiri Pirko wrote:
> Sat, Apr 27, 2024 at 12:25:44AM CEST, jacob.e.keller@intel.com wrote:
>> On 4/26/2024 6:43 AM, Jiri Pirko wrote:
>>> Fri, Apr 26, 2024 at 02:49:40PM CEST, przemyslaw.kitszel@intel.com wrote:
>>>> On 4/26/24 13:19, Jiri Pirko wrote:
>>>>> Wed, Apr 24, 2024 at 06:56:37PM CEST, jacob.e.keller@intel.com wrote:
>>>>>> On 4/24/2024 8:07 AM, Jiri Pirko wrote:
>>>>>>> Wed, Apr 24, 2024 at 12:03:25AM CEST, jacob.e.keller@intel.com wrote:
>>>>>>>> On 4/23/2024 6:14 AM, Jiri Pirko wrote:
>>>>>>>>> Tue, Apr 23, 2024 at 01:56:55PM CEST, sergey.temerkhanov@intel.com wrote:
>>>>
>>>> offtop:
>>>> It will be a good hook to put resources that are shared across ports
>>>> under it in devlink resources, so making this "merged device" an entity
>>>> would enable higher layer over what we have with devlink xxx $pf.
>>>
>>> Yes. That would be great. I think we need a "device" in a sense of
>>> struct device instance. Then you can properly model the relationships in
>>> sysfs, you can have devlink for that, etc.
>>>
>>> drivers/base/component.c does merging of multiple devices, but it does
>>> not create a "merged device", this is missing there. So we have 2
>>> options:
>>>
>>> 1) extend drivers/base/component.c to allow to create a merged device
>>>    entity
>>> 2) implement merged device infrastructure separatelly.
>>>
>>> IDK. I believe we need to rope more people in.
>>>
>>>
>>
>> drivers/base/component.c looks pretty close to what we want. Each PF
>> would register as a component, and then a driver would register as the
>> component master. It does lack a struct device, so might be challenging
>> to use with devlink unless we just opted to pick a device from the
>> components as the main device?
> 
> If I read the code correctly, the master component has to be a device as
> well. This is the missing piece I believe.
> 
> 
>>
>> extending components to have a device could be interesting, though
>> perhaps its not exactly the best place. It seems like components are
>> about combining a lot of small devices that ultimately combine into one
>> functionality at a logical level.
>>
>> That is pretty close to what we want here: one entity to control global
>> portions of an otherwise multi-function card.
>>
>> Extending it to include a struct device could work but I'm not 100% sure
>> how that fits into the component system.
> 
> Who knows? we need to rope them into this discussion...
> 



> 
>>
>> We could try extending PCI subsystem to do something similar to
>> components which is vaguely what I described a couple replies ago.
> 
> Well, feels to me this is more generic problem than PCI. One level
> above.
> 
> 
>>
>> ice_adapter is basically doing this but more bespoke and custom, and
>> still lacks the extra struct device.
> 
> Correct.
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 402436b72322..744b102f7636 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -2993,8 +2993,9 @@  ice_ptp_auxbus_create_id_table(struct ice_pf *pf, const char *name)
 static int ice_ptp_register_auxbus_driver(struct ice_pf *pf)
 {
 	struct auxiliary_driver *aux_driver;
+	struct pci_dev *pdev = pf->pdev;
 	struct ice_ptp *ptp;
-	char busdev[8] = {};
+	char busdev[16] = {};
 	struct device *dev;
 	char *name;
 	int err;
@@ -3005,8 +3006,10 @@  static int ice_ptp_register_auxbus_driver(struct ice_pf *pf)
 	INIT_LIST_HEAD(&ptp->ports_owner.ports);
 	mutex_init(&ptp->ports_owner.lock);
 	if (ice_is_e810(&pf->hw))
-		sprintf(busdev, "%u_%u_", pf->pdev->bus->number,
-			PCI_SLOT(pf->pdev->devfn));
+		snprintf(busdev, sizeof(busdev), "%u_%u_%u_",
+			 pci_domain_nr(pdev->bus),
+			 pdev->bus->number,
+			 PCI_SLOT(pdev->devfn));
 	name = devm_kasprintf(dev, GFP_KERNEL, "ptp_%sclk%u", busdev,
 			      ice_get_ptp_src_clock_index(&pf->hw));
 	if (!name)
@@ -3210,8 +3213,9 @@  static void ice_ptp_release_auxbus_device(struct device *dev)
 static int ice_ptp_create_auxbus_device(struct ice_pf *pf)
 {
 	struct auxiliary_device *aux_dev;
+	struct pci_dev *pdev = pf->pdev;
 	struct ice_ptp *ptp;
-	char busdev[8] = {};
+	char busdev[16] = {};
 	struct device *dev;
 	char *name;
 	int err;
@@ -3224,8 +3228,10 @@  static int ice_ptp_create_auxbus_device(struct ice_pf *pf)
 	aux_dev = &ptp->port.aux_dev;
 
 	if (ice_is_e810(&pf->hw))
-		sprintf(busdev, "%u_%u_", pf->pdev->bus->number,
-			PCI_SLOT(pf->pdev->devfn));
+		snprintf(busdev, sizeof(busdev), "%u_%u_%u_",
+			 pci_domain_nr(pdev->bus),
+			 pdev->bus->number,
+			 PCI_SLOT(pdev->devfn));
 
 	name = devm_kasprintf(dev, GFP_KERNEL, "ptp_%sclk%u", busdev,
 			      ice_get_ptp_src_clock_index(&pf->hw));