diff mbox series

[for-next,v2] IB/core: Only update PKEY and GID caches on respective events

Message ID 1620289904-27687-1-git-send-email-haakon.bugge@oracle.com (mailing list archive)
State Superseded
Headers show
Series [for-next,v2] IB/core: Only update PKEY and GID caches on respective events | expand

Commit Message

Haakon Bugge May 6, 2021, 8:31 a.m. UTC
Both the PKEY and GID tables in an HCA can hold in the order of
hundreds entries. Reading them are expensive. Partly because the API
for retrieving them only returns a single entry at a time. Further, on
certain implementations, e.g., CX-3, the VFs are paravirtualized in
this respect and have to rely on the PF driver to perform the
read. This again demands VF to PF communication.

IB Core's cache is refreshed on all events. Hence, filter the refresh
of the PKEY and GID caches based on the event received being
IB_EVENT_PKEY_CHANGE and IB_EVENT_GID_CHANGE respectively.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>

---

v1 -> v2:
   * Changed signature of ib_cache_update() as per Leon's suggestion
   * Added Fixes tag as per Zhu Yanjun' suggestion
---
 drivers/infiniband/core/cache.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

Comments

Leon Romanovsky May 6, 2021, 11:27 a.m. UTC | #1
On Thu, May 06, 2021 at 10:31:44AM +0200, Håkon Bugge wrote:
> Both the PKEY and GID tables in an HCA can hold in the order of
> hundreds entries. Reading them are expensive. Partly because the API
> for retrieving them only returns a single entry at a time. Further, on
> certain implementations, e.g., CX-3, the VFs are paravirtualized in
> this respect and have to rely on the PF driver to perform the
> read. This again demands VF to PF communication.
> 
> IB Core's cache is refreshed on all events. Hence, filter the refresh
> of the PKEY and GID caches based on the event received being
> IB_EVENT_PKEY_CHANGE and IB_EVENT_GID_CHANGE respectively.
> 
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
> 
> ---
> 
> v1 -> v2:
>    * Changed signature of ib_cache_update() as per Leon's suggestion
>    * Added Fixes tag as per Zhu Yanjun' suggestion
> ---
>  drivers/infiniband/core/cache.c | 23 +++++++++++++++--------
>  1 file changed, 15 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
> index 5c9fac7..1493a60 100644
> --- a/drivers/infiniband/core/cache.c
> +++ b/drivers/infiniband/core/cache.c
> @@ -1472,10 +1472,12 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>  }
>  
>  static int
> -ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
> +ib_cache_update(struct ib_device *device, u8 port, bool update_gids,
> +		bool update_pkeys, bool enforce_security)
>  {
>  	struct ib_port_attr       *tprops = NULL;
> -	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
> +	struct ib_pkey_cache      *pkey_cache = NULL;
> +	struct ib_pkey_cache      *old_pkey_cache = NULL;
>  	int                        i;
>  	int                        ret;
>  
> @@ -1492,14 +1494,16 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>  		goto err;
>  	}
>  
> -	if (!rdma_protocol_roce(device, port)) {
> +	if (!rdma_protocol_roce(device, port) && update_gids) {

Can you please elaborate why it is safe to do for IB_EVENT_GID_CHANGE only?
What about IB_EVENT_CLIENT_REREGISTER?

Thanks
Haakon Bugge May 6, 2021, 12:22 p.m. UTC | #2
> On 6 May 2021, at 13:27, Leon Romanovsky <leon@kernel.org> wrote:
> 
> On Thu, May 06, 2021 at 10:31:44AM +0200, Håkon Bugge wrote:
>> Both the PKEY and GID tables in an HCA can hold in the order of
>> hundreds entries. Reading them are expensive. Partly because the API
>> for retrieving them only returns a single entry at a time. Further, on
>> certain implementations, e.g., CX-3, the VFs are paravirtualized in
>> this respect and have to rely on the PF driver to perform the
>> read. This again demands VF to PF communication.
>> 
>> IB Core's cache is refreshed on all events. Hence, filter the refresh
>> of the PKEY and GID caches based on the event received being
>> IB_EVENT_PKEY_CHANGE and IB_EVENT_GID_CHANGE respectively.
>> 
>> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
>> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
>> 
>> ---
>> 
>> v1 -> v2:
>>   * Changed signature of ib_cache_update() as per Leon's suggestion
>>   * Added Fixes tag as per Zhu Yanjun' suggestion
>> ---
>> drivers/infiniband/core/cache.c | 23 +++++++++++++++--------
>> 1 file changed, 15 insertions(+), 8 deletions(-)
>> 
>> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
>> index 5c9fac7..1493a60 100644
>> --- a/drivers/infiniband/core/cache.c
>> +++ b/drivers/infiniband/core/cache.c
>> @@ -1472,10 +1472,12 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>> }
>> 
>> static int
>> -ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
>> +ib_cache_update(struct ib_device *device, u8 port, bool update_gids,
>> +		bool update_pkeys, bool enforce_security)
>> {
>> 	struct ib_port_attr       *tprops = NULL;
>> -	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
>> +	struct ib_pkey_cache      *pkey_cache = NULL;
>> +	struct ib_pkey_cache      *old_pkey_cache = NULL;
>> 	int                        i;
>> 	int                        ret;
>> 
>> @@ -1492,14 +1494,16 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>> 		goto err;
>> 	}
>> 
>> -	if (!rdma_protocol_roce(device, port)) {
>> +	if (!rdma_protocol_roce(device, port) && update_gids) {
> 
> Can you please elaborate why it is safe to do for IB_EVENT_GID_CHANGE only?
> What about IB_EVENT_CLIENT_REREGISTER?

Client Reregister tells host clients to re-register all subscriptions for said port with the SM. No reason to re-read the GID table in that case.

Or put it another way, when the GID table changes, the channel interface _shall_ generate a GID change event. This per IBTA o11-40.2-1.1.


Thxs, Håkon

> 
> Thanks
Leon Romanovsky May 9, 2021, 7:47 a.m. UTC | #3
On Thu, May 06, 2021 at 10:31:44AM +0200, Håkon Bugge wrote:
> Both the PKEY and GID tables in an HCA can hold in the order of
> hundreds entries. Reading them are expensive. Partly because the API
> for retrieving them only returns a single entry at a time. Further, on
> certain implementations, e.g., CX-3, the VFs are paravirtualized in
> this respect and have to rely on the PF driver to perform the
> read. This again demands VF to PF communication.
> 
> IB Core's cache is refreshed on all events. Hence, filter the refresh
> of the PKEY and GID caches based on the event received being
> IB_EVENT_PKEY_CHANGE and IB_EVENT_GID_CHANGE respectively.
> 
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
> 
> ---
> 
> v1 -> v2:
>    * Changed signature of ib_cache_update() as per Leon's suggestion
>    * Added Fixes tag as per Zhu Yanjun' suggestion
> ---
>  drivers/infiniband/core/cache.c | 23 +++++++++++++++--------
>  1 file changed, 15 insertions(+), 8 deletions(-)
> 

Thanks,
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Haakon Bugge May 24, 2021, 10:26 a.m. UTC | #4
> On 9 May 2021, at 09:47, Leon Romanovsky <leon@kernel.org> wrote:
> 
> On Thu, May 06, 2021 at 10:31:44AM +0200, Håkon Bugge wrote:
>> Both the PKEY and GID tables in an HCA can hold in the order of
>> hundreds entries. Reading them are expensive. Partly because the API
>> for retrieving them only returns a single entry at a time. Further, on
>> certain implementations, e.g., CX-3, the VFs are paravirtualized in
>> this respect and have to rely on the PF driver to perform the
>> read. This again demands VF to PF communication.
>> 
>> IB Core's cache is refreshed on all events. Hence, filter the refresh
>> of the PKEY and GID caches based on the event received being
>> IB_EVENT_PKEY_CHANGE and IB_EVENT_GID_CHANGE respectively.
>> 
>> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
>> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
>> 
>> ---
>> 
>> v1 -> v2:
>>   * Changed signature of ib_cache_update() as per Leon's suggestion
>>   * Added Fixes tag as per Zhu Yanjun' suggestion
>> ---
>> drivers/infiniband/core/cache.c | 23 +++++++++++++++--------
>> 1 file changed, 15 insertions(+), 8 deletions(-)
>> 
> 
> Thanks,
> Reviewed-by: Leon Romanovsky <leonro@nvidia.com>

I saw a handful commits being applied for-next. Anything needed from my side here?



Thxs, Håkon
Jason Gunthorpe May 25, 2021, 3:12 p.m. UTC | #5
On Mon, May 24, 2021 at 10:26:16AM +0000, Haakon Bugge wrote:
> 
> 
> > On 9 May 2021, at 09:47, Leon Romanovsky <leon@kernel.org> wrote:
> > 
> > On Thu, May 06, 2021 at 10:31:44AM +0200, Håkon Bugge wrote:
> >> Both the PKEY and GID tables in an HCA can hold in the order of
> >> hundreds entries. Reading them are expensive. Partly because the API
> >> for retrieving them only returns a single entry at a time. Further, on
> >> certain implementations, e.g., CX-3, the VFs are paravirtualized in
> >> this respect and have to rely on the PF driver to perform the
> >> read. This again demands VF to PF communication.
> >> 
> >> IB Core's cache is refreshed on all events. Hence, filter the refresh
> >> of the PKEY and GID caches based on the event received being
> >> IB_EVENT_PKEY_CHANGE and IB_EVENT_GID_CHANGE respectively.
> >> 
> >> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> >> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
> >> 
> >> 
> >> v1 -> v2:
> >>   * Changed signature of ib_cache_update() as per Leon's suggestion
> >>   * Added Fixes tag as per Zhu Yanjun' suggestion
> >> drivers/infiniband/core/cache.c | 23 +++++++++++++++--------
> >> 1 file changed, 15 insertions(+), 8 deletions(-)
> >> 
> > 
> > Thanks,
> > Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
> 
> I saw a handful commits being applied for-next. Anything needed from my side here?

It doesn't apply please resend it

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 5c9fac7..1493a60 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1472,10 +1472,12 @@  static int config_non_roce_gid_cache(struct ib_device *device,
 }
 
 static int
-ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
+ib_cache_update(struct ib_device *device, u8 port, bool update_gids,
+		bool update_pkeys, bool enforce_security)
 {
 	struct ib_port_attr       *tprops = NULL;
-	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
+	struct ib_pkey_cache      *pkey_cache = NULL;
+	struct ib_pkey_cache      *old_pkey_cache = NULL;
 	int                        i;
 	int                        ret;
 
@@ -1492,14 +1494,16 @@  static int config_non_roce_gid_cache(struct ib_device *device,
 		goto err;
 	}
 
-	if (!rdma_protocol_roce(device, port)) {
+	if (!rdma_protocol_roce(device, port) && update_gids) {
 		ret = config_non_roce_gid_cache(device, port,
 						tprops->gid_tbl_len);
 		if (ret)
 			goto err;
 	}
 
-	if (tprops->pkey_tbl_len) {
+	update_pkeys &= !!tprops->pkey_tbl_len;
+
+	if (update_pkeys) {
 		pkey_cache = kmalloc(struct_size(pkey_cache, table,
 						 tprops->pkey_tbl_len),
 				     GFP_KERNEL);
@@ -1524,9 +1528,10 @@  static int config_non_roce_gid_cache(struct ib_device *device,
 
 	write_lock_irq(&device->cache_lock);
 
-	old_pkey_cache = device->port_data[port].cache.pkey;
-
-	device->port_data[port].cache.pkey = pkey_cache;
+	if (update_pkeys) {
+		old_pkey_cache = device->port_data[port].cache.pkey;
+		device->port_data[port].cache.pkey = pkey_cache;
+	}
 	device->port_data[port].cache.lmc = tprops->lmc;
 	device->port_data[port].cache.port_state = tprops->state;
 
@@ -1558,6 +1563,8 @@  static void ib_cache_event_task(struct work_struct *_work)
 	 * the cache.
 	 */
 	ret = ib_cache_update(work->event.device, work->event.element.port_num,
+			      work->event.event == IB_EVENT_GID_CHANGE,
+			      work->event.event == IB_EVENT_PKEY_CHANGE,
 			      work->enforce_security);
 
 	/* GID event is notified already for individual GID entries by
@@ -1631,7 +1638,7 @@  int ib_cache_setup_one(struct ib_device *device)
 		return err;
 
 	rdma_for_each_port (device, p) {
-		err = ib_cache_update(device, p, true);
+		err = ib_cache_update(device, p, true, true, true);
 		if (err)
 			return err;
 	}