diff mbox series

[v11,04/14] s390/zcrypt: driver callback to indicate resource in use

Message ID 20201022171209.19494-5-akrowiak@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390/vfio-ap: dynamic configuration support | expand

Commit Message

Anthony Krowiak Oct. 22, 2020, 5:11 p.m. UTC
Introduces a new driver callback to prevent a root user from unbinding
an AP queue from its device driver if the queue is in use. The callback
will be invoked whenever a change to the AP bus's sysfs apmask or aqmask
attributes would result in one or more AP queues being removed from its
driver. If the callback responds in the affirmative for any driver
queried, the change to the apmask or aqmask will be rejected with a device
in use error.

For this patch, only non-default drivers will be queried. Currently,
there is only one non-default driver, the vfio_ap device driver. The
vfio_ap device driver facilitates pass-through of an AP queue to a
guest. The idea here is that a guest may be administered by a different
sysadmin than the host and we don't want AP resources to unexpectedly
disappear from a guest's AP configuration (i.e., adapters and domains
assigned to the matrix mdev). This will enforce the proper procedure for
removing AP resources intended for guest usage which is to
first unassign them from the matrix mdev, then unbind them from the
vfio_ap device driver.

Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
---
 drivers/s390/crypto/ap_bus.c | 148 ++++++++++++++++++++++++++++++++---
 drivers/s390/crypto/ap_bus.h |   4 +
 2 files changed, 142 insertions(+), 10 deletions(-)

Comments

Halil Pasic Oct. 27, 2020, 1:01 p.m. UTC | #1
On Thu, 22 Oct 2020 13:11:59 -0400
Tony Krowiak <akrowiak@linux.ibm.com> wrote:

> Introduces a new driver callback to prevent a root user from unbinding
> an AP queue from its device driver if the queue is in use. The callback
> will be invoked whenever a change to the AP bus's sysfs apmask or aqmask
> attributes would result in one or more AP queues being removed from its
> driver. If the callback responds in the affirmative for any driver
> queried, the change to the apmask or aqmask will be rejected with a device
> in use error.

Like discussed last time, there seems to be nothing, that would prevent
a resource becoming in use between the in_use() callback returned false
and the resource being removed as a result of ap_bus_revise_bindings().

Another thing that may be of interest, is that now we hold the
ap_perms_mutex for the in_use() checks. The ap_perms_mutex is used
in ap_device_probe() and I don't quite understand some
usages of in zcrypt_api.c My feeling is that the extra pressure on that
lock should not be a problem, except if in_use() were to not return
because of some deadlock.

With all that said if Harald is fine with it, so am I.

Acked-by: Halil Pasic <pasic@linux.ibm.com>

> 
> For this patch, only non-default drivers will be queried. Currently,
> there is only one non-default driver, the vfio_ap device driver. The
> vfio_ap device driver facilitates pass-through of an AP queue to a
> guest. The idea here is that a guest may be administered by a different
> sysadmin than the host and we don't want AP resources to unexpectedly
> disappear from a guest's AP configuration (i.e., adapters and domains
> assigned to the matrix mdev). This will enforce the proper procedure for
> removing AP resources intended for guest usage which is to
> first unassign them from the matrix mdev, then unbind them from the
> vfio_ap device driver.
> 
> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
>
Harald Freudenberger Oct. 27, 2020, 4:55 p.m. UTC | #2
On 22.10.20 19:11, Tony Krowiak wrote:
> Introduces a new driver callback to prevent a root user from unbinding
> an AP queue from its device driver if the queue is in use. The callback
> will be invoked whenever a change to the AP bus's sysfs apmask or aqmask
> attributes would result in one or more AP queues being removed from its
> driver. If the callback responds in the affirmative for any driver
> queried, the change to the apmask or aqmask will be rejected with a device
> in use error.
>
> For this patch, only non-default drivers will be queried. Currently,
> there is only one non-default driver, the vfio_ap device driver. The
> vfio_ap device driver facilitates pass-through of an AP queue to a
> guest. The idea here is that a guest may be administered by a different
> sysadmin than the host and we don't want AP resources to unexpectedly
> disappear from a guest's AP configuration (i.e., adapters and domains
> assigned to the matrix mdev). This will enforce the proper procedure for
> removing AP resources intended for guest usage which is to
> first unassign them from the matrix mdev, then unbind them from the
> vfio_ap device driver.
>
> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
> ---
>  drivers/s390/crypto/ap_bus.c | 148 ++++++++++++++++++++++++++++++++---
>  drivers/s390/crypto/ap_bus.h |   4 +
>  2 files changed, 142 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
> index 485cbfcbf06e..998e61cd86d9 100644
> --- a/drivers/s390/crypto/ap_bus.c
> +++ b/drivers/s390/crypto/ap_bus.c
> @@ -35,6 +35,7 @@
>  #include <linux/mod_devicetable.h>
>  #include <linux/debugfs.h>
>  #include <linux/ctype.h>
> +#include <linux/module.h>
>  
>  #include "ap_bus.h"
>  #include "ap_debug.h"
> @@ -893,6 +894,23 @@ static int modify_bitmap(const char *str, unsigned long *bitmap, int bits)
>  	return 0;
>  }
>  
> +static int ap_parse_bitmap_str(const char *str, unsigned long *bitmap, int bits,
> +			       unsigned long *newmap)
> +{
> +	unsigned long size;
> +	int rc;
> +
> +	size = BITS_TO_LONGS(bits)*sizeof(unsigned long);
> +	if (*str == '+' || *str == '-') {
> +		memcpy(newmap, bitmap, size);
> +		rc = modify_bitmap(str, newmap, bits);
> +	} else {
> +		memset(newmap, 0, size);
> +		rc = hex2bitmap(str, newmap, bits);
> +	}
> +	return rc;
> +}
> +
>  int ap_parse_mask_str(const char *str,
>  		      unsigned long *bitmap, int bits,
>  		      struct mutex *lock)
> @@ -912,14 +930,7 @@ int ap_parse_mask_str(const char *str,
>  		kfree(newmap);
>  		return -ERESTARTSYS;
>  	}
> -
> -	if (*str == '+' || *str == '-') {
> -		memcpy(newmap, bitmap, size);
> -		rc = modify_bitmap(str, newmap, bits);
> -	} else {
> -		memset(newmap, 0, size);
> -		rc = hex2bitmap(str, newmap, bits);
> -	}
> +	rc = ap_parse_bitmap_str(str, bitmap, bits, newmap);
>  	if (rc == 0)
>  		memcpy(bitmap, newmap, size);
>  	mutex_unlock(lock);
> @@ -1111,12 +1122,70 @@ static ssize_t apmask_show(struct bus_type *bus, char *buf)
>  	return rc;
>  }
>  
> +static int __verify_card_reservations(struct device_driver *drv, void *data)
> +{
> +	int rc = 0;
> +	struct ap_driver *ap_drv = to_ap_drv(drv);
> +	unsigned long *newapm = (unsigned long *)data;
> +
> +	/*
> +	 * No need to verify whether the driver is using the queues if it is the
> +	 * default driver.
> +	 */
> +	if (ap_drv->flags & AP_DRIVER_FLAG_DEFAULT)
> +		return 0;
> +
> +	/* The non-default driver's module must be loaded */
Can you please update this comment? It should be something like
/* increase the driver's module refcounter to be sure it is not
   going away when we invoke the callback function. */

> +	if (!try_module_get(drv->owner))
> +		return 0;
> +
> +	if (ap_drv->in_use)
> +		if (ap_drv->in_use(newapm, ap_perms.aqm))
> +			rc = -EBUSY;
> +
And here: /* release driver's module */ or simmilar
> +	module_put(drv->owner);
> +
> +	return rc;
> +}
> +
> +static int apmask_commit(unsigned long *newapm)
> +{
> +	int rc;
> +	unsigned long reserved[BITS_TO_LONGS(AP_DEVICES)];
> +
> +	/*
> +	 * Check if any bits in the apmask have been set which will
> +	 * result in queues being removed from non-default drivers
> +	 */
> +	if (bitmap_andnot(reserved, newapm, ap_perms.apm, AP_DEVICES)) {
> +		rc = bus_for_each_drv(&ap_bus_type, NULL, reserved,
> +				      __verify_card_reservations);
> +		if (rc)
> +			return rc;
> +	}
> +
> +	memcpy(ap_perms.apm, newapm, APMASKSIZE);
> +
> +	return 0;
> +}
> +
>  static ssize_t apmask_store(struct bus_type *bus, const char *buf,
>  			    size_t count)
>  {
>  	int rc;
> +	DECLARE_BITMAP(newapm, AP_DEVICES);
> +
> +	if (mutex_lock_interruptible(&ap_perms_mutex))
> +		return -ERESTARTSYS;
> +
> +	rc = ap_parse_bitmap_str(buf, ap_perms.apm, AP_DEVICES, newapm);
> +	if (rc)
> +		goto done;
>  
> -	rc = ap_parse_mask_str(buf, ap_perms.apm, AP_DEVICES, &ap_perms_mutex);
> +	rc = apmask_commit(newapm);
> +
> +done:
> +	mutex_unlock(&ap_perms_mutex);
>  	if (rc)
>  		return rc;
>  
> @@ -1142,12 +1211,71 @@ static ssize_t aqmask_show(struct bus_type *bus, char *buf)
>  	return rc;
>  }
>  
> +static int __verify_queue_reservations(struct device_driver *drv, void *data)
> +{
> +	int rc = 0;
> +	struct ap_driver *ap_drv = to_ap_drv(drv);
> +	unsigned long *newaqm = (unsigned long *)data;
> +
> +	/*
> +	 * If the reserved bits do not identify queues reserved for use by the
> +	 * non-default driver, there is no need to verify the driver is using
> +	 * the queues.
> +	 */
> +	if (ap_drv->flags & AP_DRIVER_FLAG_DEFAULT)
> +		return 0;
> +
> +	/* The non-default driver's module must be loaded */
Same here.
> +	if (!try_module_get(drv->owner))
> +		return 0;
> +
> +	if (ap_drv->in_use)
> +		if (ap_drv->in_use(ap_perms.apm, newaqm))
> +			rc = -EBUSY;
> +
and here
> +	module_put(drv->owner);
> +
> +	return rc;
> +}
> +
> +static int aqmask_commit(unsigned long *newaqm)
> +{
> +	int rc;
> +	unsigned long reserved[BITS_TO_LONGS(AP_DOMAINS)];
> +
> +	/*
> +	 * Check if any bits in the aqmask have been set which will
> +	 * result in queues being removed from non-default drivers
> +	 */
> +	if (bitmap_andnot(reserved, newaqm, ap_perms.aqm, AP_DOMAINS)) {
> +		rc = bus_for_each_drv(&ap_bus_type, NULL, reserved,
> +				      __verify_queue_reservations);
> +		if (rc)
> +			return rc;
> +	}
> +
> +	memcpy(ap_perms.aqm, newaqm, AQMASKSIZE);
> +
> +	return 0;
> +}
> +
>  static ssize_t aqmask_store(struct bus_type *bus, const char *buf,
>  			    size_t count)
>  {
>  	int rc;
> +	DECLARE_BITMAP(newaqm, AP_DOMAINS);
>  
> -	rc = ap_parse_mask_str(buf, ap_perms.aqm, AP_DOMAINS, &ap_perms_mutex);
> +	if (mutex_lock_interruptible(&ap_perms_mutex))
> +		return -ERESTARTSYS;
> +
> +	rc = ap_parse_bitmap_str(buf, ap_perms.aqm, AP_DOMAINS, newaqm);
> +	if (rc)
> +		goto done;
> +
> +	rc = aqmask_commit(newaqm);
> +
> +done:
> +	mutex_unlock(&ap_perms_mutex);
>  	if (rc)
>  		return rc;
>  
> diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
> index 5029b80132aa..6ce154d924d3 100644
> --- a/drivers/s390/crypto/ap_bus.h
> +++ b/drivers/s390/crypto/ap_bus.h
> @@ -145,6 +145,7 @@ struct ap_driver {
>  
>  	int (*probe)(struct ap_device *);
>  	void (*remove)(struct ap_device *);
> +	bool (*in_use)(unsigned long *apm, unsigned long *aqm);
>  };
>  
>  #define to_ap_drv(x) container_of((x), struct ap_driver, driver)
> @@ -293,6 +294,9 @@ void ap_queue_init_state(struct ap_queue *aq);
>  struct ap_card *ap_card_create(int id, int queue_depth, int raw_device_type,
>  			       int comp_device_type, unsigned int functions);
>  
> +#define APMASKSIZE (BITS_TO_LONGS(AP_DEVICES) * sizeof(unsigned long))
> +#define AQMASKSIZE (BITS_TO_LONGS(AP_DOMAINS) * sizeof(unsigned long))
> +
>  struct ap_perms {
>  	unsigned long ioctlm[BITS_TO_LONGS(AP_IOCTLS)];
>  	unsigned long apm[BITS_TO_LONGS(AP_DEVICES)];
I still don't like this code. That's because of what it is doing - not because of the code quality.
And Halil, you are right. It is adding more pressure to the mutex used for locking the apmask
and aqmask stuff (and the zcrypt multiple device drivers support code also).
I am very concerned about the in_use callback which is called with the ap_perms_mutex
held AND during bus_for_each_drv (so holding the overall AP BUS mutex) and then diving
into the vfio_ap ... with yet another mutex to protect the vfio structs.
Reviewed-by: Harald Freudenberger <freude@linux.ibm.com>
Anthony Krowiak Nov. 13, 2020, 9:30 p.m. UTC | #3
On 10/27/20 12:55 PM, Harald Freudenberger wrote:
> On 22.10.20 19:11, Tony Krowiak wrote:
>> Introduces a new driver callback to prevent a root user from unbinding
>> an AP queue from its device driver if the queue is in use. The callback
>> will be invoked whenever a change to the AP bus's sysfs apmask or aqmask
>> attributes would result in one or more AP queues being removed from its
>> driver. If the callback responds in the affirmative for any driver
>> queried, the change to the apmask or aqmask will be rejected with a device
>> in use error.
>>
>> For this patch, only non-default drivers will be queried. Currently,
>> there is only one non-default driver, the vfio_ap device driver. The
>> vfio_ap device driver facilitates pass-through of an AP queue to a
>> guest. The idea here is that a guest may be administered by a different
>> sysadmin than the host and we don't want AP resources to unexpectedly
>> disappear from a guest's AP configuration (i.e., adapters and domains
>> assigned to the matrix mdev). This will enforce the proper procedure for
>> removing AP resources intended for guest usage which is to
>> first unassign them from the matrix mdev, then unbind them from the
>> vfio_ap device driver.
>>
>> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
>> ---
>>   drivers/s390/crypto/ap_bus.c | 148 ++++++++++++++++++++++++++++++++---
>>   drivers/s390/crypto/ap_bus.h |   4 +
>>   2 files changed, 142 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
>> index 485cbfcbf06e..998e61cd86d9 100644
>> --- a/drivers/s390/crypto/ap_bus.c
>> +++ b/drivers/s390/crypto/ap_bus.c
>> @@ -35,6 +35,7 @@
>>   #include <linux/mod_devicetable.h>
>>   #include <linux/debugfs.h>
>>   #include <linux/ctype.h>
>> +#include <linux/module.h>
>>   
>>   #include "ap_bus.h"
>>   #include "ap_debug.h"
>> @@ -893,6 +894,23 @@ static int modify_bitmap(const char *str, unsigned long *bitmap, int bits)
>>   	return 0;
>>   }
>>   
>> +static int ap_parse_bitmap_str(const char *str, unsigned long *bitmap, int bits,
>> +			       unsigned long *newmap)
>> +{
>> +	unsigned long size;
>> +	int rc;
>> +
>> +	size = BITS_TO_LONGS(bits)*sizeof(unsigned long);
>> +	if (*str == '+' || *str == '-') {
>> +		memcpy(newmap, bitmap, size);
>> +		rc = modify_bitmap(str, newmap, bits);
>> +	} else {
>> +		memset(newmap, 0, size);
>> +		rc = hex2bitmap(str, newmap, bits);
>> +	}
>> +	return rc;
>> +}
>> +
>>   int ap_parse_mask_str(const char *str,
>>   		      unsigned long *bitmap, int bits,
>>   		      struct mutex *lock)
>> @@ -912,14 +930,7 @@ int ap_parse_mask_str(const char *str,
>>   		kfree(newmap);
>>   		return -ERESTARTSYS;
>>   	}
>> -
>> -	if (*str == '+' || *str == '-') {
>> -		memcpy(newmap, bitmap, size);
>> -		rc = modify_bitmap(str, newmap, bits);
>> -	} else {
>> -		memset(newmap, 0, size);
>> -		rc = hex2bitmap(str, newmap, bits);
>> -	}
>> +	rc = ap_parse_bitmap_str(str, bitmap, bits, newmap);
>>   	if (rc == 0)
>>   		memcpy(bitmap, newmap, size);
>>   	mutex_unlock(lock);
>> @@ -1111,12 +1122,70 @@ static ssize_t apmask_show(struct bus_type *bus, char *buf)
>>   	return rc;
>>   }
>>   
>> +static int __verify_card_reservations(struct device_driver *drv, void *data)
>> +{
>> +	int rc = 0;
>> +	struct ap_driver *ap_drv = to_ap_drv(drv);
>> +	unsigned long *newapm = (unsigned long *)data;
>> +
>> +	/*
>> +	 * No need to verify whether the driver is using the queues if it is the
>> +	 * default driver.
>> +	 */
>> +	if (ap_drv->flags & AP_DRIVER_FLAG_DEFAULT)
>> +		return 0;
>> +
>> +	/* The non-default driver's module must be loaded */
> Can you please update this comment? It should be something like
> /* increase the driver's module refcounter to be sure it is not
>     going away when we invoke the callback function. */

Will do.

>
>> +	if (!try_module_get(drv->owner))
>> +		return 0;
>> +
>> +	if (ap_drv->in_use)
>> +		if (ap_drv->in_use(newapm, ap_perms.aqm))
>> +			rc = -EBUSY;
>> +
> And here: /* release driver's module */ or simmilar

Okay

>> +	module_put(drv->owner);
>> +
>> +	return rc;
>> +}
>> +
>> +static int apmask_commit(unsigned long *newapm)
>> +{
>> +	int rc;
>> +	unsigned long reserved[BITS_TO_LONGS(AP_DEVICES)];
>> +
>> +	/*
>> +	 * Check if any bits in the apmask have been set which will
>> +	 * result in queues being removed from non-default drivers
>> +	 */
>> +	if (bitmap_andnot(reserved, newapm, ap_perms.apm, AP_DEVICES)) {
>> +		rc = bus_for_each_drv(&ap_bus_type, NULL, reserved,
>> +				      __verify_card_reservations);
>> +		if (rc)
>> +			return rc;
>> +	}
>> +
>> +	memcpy(ap_perms.apm, newapm, APMASKSIZE);
>> +
>> +	return 0;
>> +}
>> +
>>   static ssize_t apmask_store(struct bus_type *bus, const char *buf,
>>   			    size_t count)
>>   {
>>   	int rc;
>> +	DECLARE_BITMAP(newapm, AP_DEVICES);
>> +
>> +	if (mutex_lock_interruptible(&ap_perms_mutex))
>> +		return -ERESTARTSYS;
>> +
>> +	rc = ap_parse_bitmap_str(buf, ap_perms.apm, AP_DEVICES, newapm);
>> +	if (rc)
>> +		goto done;
>>   
>> -	rc = ap_parse_mask_str(buf, ap_perms.apm, AP_DEVICES, &ap_perms_mutex);
>> +	rc = apmask_commit(newapm);
>> +
>> +done:
>> +	mutex_unlock(&ap_perms_mutex);
>>   	if (rc)
>>   		return rc;
>>   
>> @@ -1142,12 +1211,71 @@ static ssize_t aqmask_show(struct bus_type *bus, char *buf)
>>   	return rc;
>>   }
>>   
>> +static int __verify_queue_reservations(struct device_driver *drv, void *data)
>> +{
>> +	int rc = 0;
>> +	struct ap_driver *ap_drv = to_ap_drv(drv);
>> +	unsigned long *newaqm = (unsigned long *)data;
>> +
>> +	/*
>> +	 * If the reserved bits do not identify queues reserved for use by the
>> +	 * non-default driver, there is no need to verify the driver is using
>> +	 * the queues.
>> +	 */
>> +	if (ap_drv->flags & AP_DRIVER_FLAG_DEFAULT)
>> +		return 0;
>> +
>> +	/* The non-default driver's module must be loaded */
> Same here.

Okay

>> +	if (!try_module_get(drv->owner))
>> +		return 0;
>> +
>> +	if (ap_drv->in_use)
>> +		if (ap_drv->in_use(ap_perms.apm, newaqm))
>> +			rc = -EBUSY;
>> +
> and here

Okay

>> +	module_put(drv->owner);
>> +
>> +	return rc;
>> +}
>> +
>> +static int aqmask_commit(unsigned long *newaqm)
>> +{
>> +	int rc;
>> +	unsigned long reserved[BITS_TO_LONGS(AP_DOMAINS)];
>> +
>> +	/*
>> +	 * Check if any bits in the aqmask have been set which will
>> +	 * result in queues being removed from non-default drivers
>> +	 */
>> +	if (bitmap_andnot(reserved, newaqm, ap_perms.aqm, AP_DOMAINS)) {
>> +		rc = bus_for_each_drv(&ap_bus_type, NULL, reserved,
>> +				      __verify_queue_reservations);
>> +		if (rc)
>> +			return rc;
>> +	}
>> +
>> +	memcpy(ap_perms.aqm, newaqm, AQMASKSIZE);
>> +
>> +	return 0;
>> +}
>> +
>>   static ssize_t aqmask_store(struct bus_type *bus, const char *buf,
>>   			    size_t count)
>>   {
>>   	int rc;
>> +	DECLARE_BITMAP(newaqm, AP_DOMAINS);
>>   
>> -	rc = ap_parse_mask_str(buf, ap_perms.aqm, AP_DOMAINS, &ap_perms_mutex);
>> +	if (mutex_lock_interruptible(&ap_perms_mutex))
>> +		return -ERESTARTSYS;
>> +
>> +	rc = ap_parse_bitmap_str(buf, ap_perms.aqm, AP_DOMAINS, newaqm);
>> +	if (rc)
>> +		goto done;
>> +
>> +	rc = aqmask_commit(newaqm);
>> +
>> +done:
>> +	mutex_unlock(&ap_perms_mutex);
>>   	if (rc)
>>   		return rc;
>>   
>> diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
>> index 5029b80132aa..6ce154d924d3 100644
>> --- a/drivers/s390/crypto/ap_bus.h
>> +++ b/drivers/s390/crypto/ap_bus.h
>> @@ -145,6 +145,7 @@ struct ap_driver {
>>   
>>   	int (*probe)(struct ap_device *);
>>   	void (*remove)(struct ap_device *);
>> +	bool (*in_use)(unsigned long *apm, unsigned long *aqm);
>>   };
>>   
>>   #define to_ap_drv(x) container_of((x), struct ap_driver, driver)
>> @@ -293,6 +294,9 @@ void ap_queue_init_state(struct ap_queue *aq);
>>   struct ap_card *ap_card_create(int id, int queue_depth, int raw_device_type,
>>   			       int comp_device_type, unsigned int functions);
>>   
>> +#define APMASKSIZE (BITS_TO_LONGS(AP_DEVICES) * sizeof(unsigned long))
>> +#define AQMASKSIZE (BITS_TO_LONGS(AP_DOMAINS) * sizeof(unsigned long))
>> +
>>   struct ap_perms {
>>   	unsigned long ioctlm[BITS_TO_LONGS(AP_IOCTLS)];
>>   	unsigned long apm[BITS_TO_LONGS(AP_DEVICES)];
> I still don't like this code. That's because of what it is doing - not because of the code quality.
> And Halil, you are right. It is adding more pressure to the mutex used for locking the apmask
> and aqmask stuff (and the zcrypt multiple device drivers support code also).
> I am very concerned about the in_use callback which is called with the ap_perms_mutex
> held AND during bus_for_each_drv (so holding the overall AP BUS mutex) and then diving
> into the vfio_ap ... with yet another mutex to protect the vfio structs.
> Reviewed-by: Harald Freudenberger <freude@linux.ibm.com>

Thank you for your review. Maybe you ought to bring these concerns up with
our crypto architect. Halil came up with a solution for the potential 
deadlock
situation. We will be using the mutex_trylock() function in our sysfs 
assignment
interfaces which make the call to the AP bus to check permissions (which 
also
locks ap_perms). If the mutex_trylock() fails, we return from the assignment
function with -EBUSY. This should resolve that potential deadlock issue.
Halil Pasic Nov. 14, 2020, midnight UTC | #4
On Fri, 13 Nov 2020 16:30:31 -0500
Tony Krowiak <akrowiak@linux.ibm.com> wrote:

> We will be using the mutex_trylock() function in our sysfs 
> assignment
> interfaces which make the call to the AP bus to check permissions (which 
> also
> locks ap_perms). If the mutex_trylock() fails, we return from the assignment
> function with -EBUSY. This should resolve that potential deadlock issue.

It resolves the deadlock issue only if in_use() is also doing
mutex_trylock(), but the if in_use doesn't take the lock it
needs to back off (and so does it's client code) i.e. a boolean as
return value won't do.

Regards,
Halil
Anthony Krowiak Nov. 16, 2020, 4:23 p.m. UTC | #5
On 11/13/20 7:00 PM, Halil Pasic wrote:
> On Fri, 13 Nov 2020 16:30:31 -0500
> Tony Krowiak <akrowiak@linux.ibm.com> wrote:
>
>> We will be using the mutex_trylock() function in our sysfs
>> assignment
>> interfaces which make the call to the AP bus to check permissions (which
>> also
>> locks ap_perms). If the mutex_trylock() fails, we return from the assignment
>> function with -EBUSY. This should resolve that potential deadlock issue.
> It resolves the deadlock issue only if in_use() is also doing
> mutex_trylock(), but the if in_use doesn't take the lock it
> needs to back off (and so does it's client code) i.e. a boolean as
> return value won't do.

Makes sense. I'll change the in_use callback to return an int and use
mutex_trylock() for the vfio_ap_mdev_in_use() function. If the lock
can not be obtained, the function will return -EBUSY.

>
> Regards,
> Halil
diff mbox series

Patch

diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 485cbfcbf06e..998e61cd86d9 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -35,6 +35,7 @@ 
 #include <linux/mod_devicetable.h>
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
+#include <linux/module.h>
 
 #include "ap_bus.h"
 #include "ap_debug.h"
@@ -893,6 +894,23 @@  static int modify_bitmap(const char *str, unsigned long *bitmap, int bits)
 	return 0;
 }
 
+static int ap_parse_bitmap_str(const char *str, unsigned long *bitmap, int bits,
+			       unsigned long *newmap)
+{
+	unsigned long size;
+	int rc;
+
+	size = BITS_TO_LONGS(bits)*sizeof(unsigned long);
+	if (*str == '+' || *str == '-') {
+		memcpy(newmap, bitmap, size);
+		rc = modify_bitmap(str, newmap, bits);
+	} else {
+		memset(newmap, 0, size);
+		rc = hex2bitmap(str, newmap, bits);
+	}
+	return rc;
+}
+
 int ap_parse_mask_str(const char *str,
 		      unsigned long *bitmap, int bits,
 		      struct mutex *lock)
@@ -912,14 +930,7 @@  int ap_parse_mask_str(const char *str,
 		kfree(newmap);
 		return -ERESTARTSYS;
 	}
-
-	if (*str == '+' || *str == '-') {
-		memcpy(newmap, bitmap, size);
-		rc = modify_bitmap(str, newmap, bits);
-	} else {
-		memset(newmap, 0, size);
-		rc = hex2bitmap(str, newmap, bits);
-	}
+	rc = ap_parse_bitmap_str(str, bitmap, bits, newmap);
 	if (rc == 0)
 		memcpy(bitmap, newmap, size);
 	mutex_unlock(lock);
@@ -1111,12 +1122,70 @@  static ssize_t apmask_show(struct bus_type *bus, char *buf)
 	return rc;
 }
 
+static int __verify_card_reservations(struct device_driver *drv, void *data)
+{
+	int rc = 0;
+	struct ap_driver *ap_drv = to_ap_drv(drv);
+	unsigned long *newapm = (unsigned long *)data;
+
+	/*
+	 * No need to verify whether the driver is using the queues if it is the
+	 * default driver.
+	 */
+	if (ap_drv->flags & AP_DRIVER_FLAG_DEFAULT)
+		return 0;
+
+	/* The non-default driver's module must be loaded */
+	if (!try_module_get(drv->owner))
+		return 0;
+
+	if (ap_drv->in_use)
+		if (ap_drv->in_use(newapm, ap_perms.aqm))
+			rc = -EBUSY;
+
+	module_put(drv->owner);
+
+	return rc;
+}
+
+static int apmask_commit(unsigned long *newapm)
+{
+	int rc;
+	unsigned long reserved[BITS_TO_LONGS(AP_DEVICES)];
+
+	/*
+	 * Check if any bits in the apmask have been set which will
+	 * result in queues being removed from non-default drivers
+	 */
+	if (bitmap_andnot(reserved, newapm, ap_perms.apm, AP_DEVICES)) {
+		rc = bus_for_each_drv(&ap_bus_type, NULL, reserved,
+				      __verify_card_reservations);
+		if (rc)
+			return rc;
+	}
+
+	memcpy(ap_perms.apm, newapm, APMASKSIZE);
+
+	return 0;
+}
+
 static ssize_t apmask_store(struct bus_type *bus, const char *buf,
 			    size_t count)
 {
 	int rc;
+	DECLARE_BITMAP(newapm, AP_DEVICES);
+
+	if (mutex_lock_interruptible(&ap_perms_mutex))
+		return -ERESTARTSYS;
+
+	rc = ap_parse_bitmap_str(buf, ap_perms.apm, AP_DEVICES, newapm);
+	if (rc)
+		goto done;
 
-	rc = ap_parse_mask_str(buf, ap_perms.apm, AP_DEVICES, &ap_perms_mutex);
+	rc = apmask_commit(newapm);
+
+done:
+	mutex_unlock(&ap_perms_mutex);
 	if (rc)
 		return rc;
 
@@ -1142,12 +1211,71 @@  static ssize_t aqmask_show(struct bus_type *bus, char *buf)
 	return rc;
 }
 
+static int __verify_queue_reservations(struct device_driver *drv, void *data)
+{
+	int rc = 0;
+	struct ap_driver *ap_drv = to_ap_drv(drv);
+	unsigned long *newaqm = (unsigned long *)data;
+
+	/*
+	 * If the reserved bits do not identify queues reserved for use by the
+	 * non-default driver, there is no need to verify the driver is using
+	 * the queues.
+	 */
+	if (ap_drv->flags & AP_DRIVER_FLAG_DEFAULT)
+		return 0;
+
+	/* The non-default driver's module must be loaded */
+	if (!try_module_get(drv->owner))
+		return 0;
+
+	if (ap_drv->in_use)
+		if (ap_drv->in_use(ap_perms.apm, newaqm))
+			rc = -EBUSY;
+
+	module_put(drv->owner);
+
+	return rc;
+}
+
+static int aqmask_commit(unsigned long *newaqm)
+{
+	int rc;
+	unsigned long reserved[BITS_TO_LONGS(AP_DOMAINS)];
+
+	/*
+	 * Check if any bits in the aqmask have been set which will
+	 * result in queues being removed from non-default drivers
+	 */
+	if (bitmap_andnot(reserved, newaqm, ap_perms.aqm, AP_DOMAINS)) {
+		rc = bus_for_each_drv(&ap_bus_type, NULL, reserved,
+				      __verify_queue_reservations);
+		if (rc)
+			return rc;
+	}
+
+	memcpy(ap_perms.aqm, newaqm, AQMASKSIZE);
+
+	return 0;
+}
+
 static ssize_t aqmask_store(struct bus_type *bus, const char *buf,
 			    size_t count)
 {
 	int rc;
+	DECLARE_BITMAP(newaqm, AP_DOMAINS);
 
-	rc = ap_parse_mask_str(buf, ap_perms.aqm, AP_DOMAINS, &ap_perms_mutex);
+	if (mutex_lock_interruptible(&ap_perms_mutex))
+		return -ERESTARTSYS;
+
+	rc = ap_parse_bitmap_str(buf, ap_perms.aqm, AP_DOMAINS, newaqm);
+	if (rc)
+		goto done;
+
+	rc = aqmask_commit(newaqm);
+
+done:
+	mutex_unlock(&ap_perms_mutex);
 	if (rc)
 		return rc;
 
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 5029b80132aa..6ce154d924d3 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -145,6 +145,7 @@  struct ap_driver {
 
 	int (*probe)(struct ap_device *);
 	void (*remove)(struct ap_device *);
+	bool (*in_use)(unsigned long *apm, unsigned long *aqm);
 };
 
 #define to_ap_drv(x) container_of((x), struct ap_driver, driver)
@@ -293,6 +294,9 @@  void ap_queue_init_state(struct ap_queue *aq);
 struct ap_card *ap_card_create(int id, int queue_depth, int raw_device_type,
 			       int comp_device_type, unsigned int functions);
 
+#define APMASKSIZE (BITS_TO_LONGS(AP_DEVICES) * sizeof(unsigned long))
+#define AQMASKSIZE (BITS_TO_LONGS(AP_DOMAINS) * sizeof(unsigned long))
+
 struct ap_perms {
 	unsigned long ioctlm[BITS_TO_LONGS(AP_IOCTLS)];
 	unsigned long apm[BITS_TO_LONGS(AP_DEVICES)];