diff mbox series

[v5,6/7] s390: vfio-ap: add logging to vfio_ap driver

Message ID 1564612877-7598-7-git-send-email-akrowiak@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390: vfio-ap: dynamic configuration support | expand

Commit Message

Anthony Krowiak July 31, 2019, 10:41 p.m. UTC
Added two DBF log files for logging events and errors; one for the vfio_ap
driver, and one for each matrix mediated device.

Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_drv.c     |  34 +++++++
 drivers/s390/crypto/vfio_ap_ops.c     | 187 ++++++++++++++++++++++++++++++----
 drivers/s390/crypto/vfio_ap_private.h |  20 ++++
 3 files changed, 223 insertions(+), 18 deletions(-)

Comments

Cornelia Huck Aug. 12, 2019, 10:35 a.m. UTC | #1
On Wed, 31 Jul 2019 18:41:16 -0400
Tony Krowiak <akrowiak@linux.ibm.com> wrote:

> Added two DBF log files for logging events and errors; one for the vfio_ap
> driver, and one for each matrix mediated device.

While the s390dbf is useful (especially for accessing the information
in dumps), trace points are a more standard interface. Have you
evaluated that as well? (We probably should add something to the
vfio/mdev code as well; tracing there is a good complement to tracing
in vendor drivers.)

Also, isn't this independent of the rest of the series?

> 
> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
> ---
>  drivers/s390/crypto/vfio_ap_drv.c     |  34 +++++++
>  drivers/s390/crypto/vfio_ap_ops.c     | 187 ++++++++++++++++++++++++++++++----
>  drivers/s390/crypto/vfio_ap_private.h |  20 ++++
>  3 files changed, 223 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
> index d8da520ae1fa..04a77246c22a 100644
> --- a/drivers/s390/crypto/vfio_ap_drv.c
> +++ b/drivers/s390/crypto/vfio_ap_drv.c
> @@ -22,6 +22,10 @@ MODULE_AUTHOR("IBM Corporation");
>  MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018");
>  MODULE_LICENSE("GPL v2");
>  
> +uint dbglvl = 3;
> +module_param(dbglvl, uint, 0444);
> +MODULE_PARM_DESC(dbglvl, "VFIO_AP driver debug level.");

More the default debug level, isn't it? (IIRC, you can change the level
of the s390dbfs dynamically.)

> +
>  static struct ap_driver vfio_ap_drv;
>  
>  struct ap_matrix_dev *matrix_dev;
> @@ -158,6 +162,21 @@ static void vfio_ap_matrix_dev_destroy(void)
>  	root_device_unregister(root_device);
>  }
>  
> +static void vfio_ap_log_queues_in_use(struct ap_matrix_mdev *matrix_mdev,
> +				  unsigned long *apm, unsigned long *aqm)
> +{
> +	unsigned long apid, apqi;
> +
> +	for_each_set_bit_inv(apid, apm, AP_DEVICES) {
> +		for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
> +			VFIO_AP_DBF(matrix_dev, DBF_ERR,
> +				    "queue %02lx.%04lx in use by mdev %s\n",
> +				    apid, apqi,
> +				    dev_name(mdev_dev(matrix_mdev->mdev)));

I remember some issues wrt %s in s390dbfs (lifetime); will this dbf
potentially outlive the mdev? Or is the string copied? (Or has s390dbf
been changed to avoid that trap? If so, please disregard my comments.)

> +		}
> +	}
> +}
> +
>  static bool vfio_ap_resource_in_use(unsigned long *apm, unsigned long *aqm)
>  {
>  	bool in_use = false;
> @@ -179,6 +198,8 @@ static bool vfio_ap_resource_in_use(unsigned long *apm, unsigned long *aqm)
>  			continue;
>  
>  		in_use = true;
> +		vfio_ap_log_queues_in_use(matrix_mdev, apm_intrsctn,
> +					  aqm_intrsctn);
>  	}
>  
>  	mutex_unlock(&matrix_dev->lock);
> @@ -186,6 +207,16 @@ static bool vfio_ap_resource_in_use(unsigned long *apm, unsigned long *aqm)
>  	return in_use;
>  }
>  
> +static int __init vfio_ap_debug_init(void)
> +{
> +	matrix_dev->dbf = debug_register(VFIO_AP_DRV_NAME, 1, 1,
> +					 DBF_SPRINTF_MAX_ARGS * sizeof(long));

It seems that debug_register() can possibly fail? (Unlikely, but we
should check.)

> +	debug_register_view(matrix_dev->dbf, &debug_sprintf_view);
> +	debug_set_level(matrix_dev->dbf, dbglvl);
> +
> +	return 0;
> +}
> +
>  static int __init vfio_ap_init(void)
>  {
>  	int ret;
> @@ -219,6 +250,8 @@ static int __init vfio_ap_init(void)
>  		return ret;
>  	}
>  
> +	vfio_ap_debug_init();
> +
>  	return 0;
>  }
>  
> @@ -227,6 +260,7 @@ static void __exit vfio_ap_exit(void)
>  	vfio_ap_mdev_unregister();
>  	ap_driver_unregister(&vfio_ap_drv);
>  	vfio_ap_matrix_dev_destroy();
> +	debug_unregister(matrix_dev->dbf);
>  }
>  
>  module_init(vfio_ap_init);
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index 0e748819abb6..1aa18eba43d0 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -167,17 +167,23 @@ static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
>  		case AP_RESPONSE_INVALID_ADDRESS:
>  		default:
>  			/* All cases in default means AP not operational */
> -			WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
> -				  status.response_code);
>  			goto end_free;
>  		}
>  	} while (retries--);
>  
> -	WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
> -		  status.response_code);
>  end_free:
>  	vfio_ap_free_aqic_resources(q);
>  	q->matrix_mdev = NULL;
> +	if (status.response_code) {

If I read the code correctly, we consider AP_RESPONSE_OTHERWISE_CHANGED
a success as well, don't we? (Not sure what that means, though.)

> +		VFIO_AP_MDEV_DBF(q->matrix_mdev, DBF_WARN,
> +			 "IRQ disable failed for queue %02x.%04x: status response code=%u\n",
> +			 AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
> +			 status.response_code);
> +	} else {
> +		VFIO_AP_MDEV_DBF(q->matrix_mdev, DBF_INFO,
> +				 "IRQ disabled for queue %02x.%04x\n",
> +				 AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn));
> +	}
>  	return status;
>  }
>  

(...)

> @@ -321,8 +340,29 @@ static void vfio_ap_matrix_init(struct ap_config_info *info,
>  	matrix->adm_max = info->apxa ? info->Nd : 15;
>  }
>  
> +static int vfio_ap_mdev_debug_init(struct ap_matrix_mdev *matrix_mdev)
> +{
> +	int ret;
> +
> +	matrix_mdev->dbf = debug_register(dev_name(mdev_dev(matrix_mdev->mdev)),
> +					  1, 1,
> +					  DBF_SPRINTF_MAX_ARGS * sizeof(long));
> +
> +	if (!matrix_mdev->dbf)
> +		return -ENOMEM;

Ok, here we do check for the result of debug_register().

> +
> +	ret = debug_register_view(matrix_mdev->dbf, &debug_sprintf_view);
> +	if (ret)
> +		return ret;

Don't we need to clean up ->dbf in the failure case?

Also, we probably need to check this as well for the other dbf.

> +
> +	debug_set_level(matrix_mdev->dbf, dbglvl);
> +
> +	return 0;
> +}
> +
>  static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
>  {
> +	int ret;
>  	struct ap_matrix_mdev *matrix_mdev;
>  
>  	if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
> @@ -335,6 +375,13 @@ static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
>  	}
>  
>  	matrix_mdev->mdev = mdev;
> +
> +	ret = vfio_ap_mdev_debug_init(matrix_mdev);
> +	if (ret) {
> +		kfree(matrix_mdev);
> +		return ret;

You also should bump available_instances again in the failure case.

> +	}
> +
>  	vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
>  	mdev_set_drvdata(mdev, matrix_mdev);
>  	matrix_mdev->pqap_hook.hook = handle_pqap;
> @@ -350,14 +397,19 @@ static int vfio_ap_mdev_remove(struct mdev_device *mdev)
>  {
>  	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>  
> -	if (matrix_mdev->kvm)
> +	if (matrix_mdev->kvm) {
> +		VFIO_AP_MDEV_DBF(matrix_mdev, DBF_ERR,
> +				 "remove rejected, mdev in use by %s",
> +				 matrix_mdev->kvm->debugfs_dentry->d_iname);

Can this be a problem when the kvm goes away (and the d_iname is gone)?

Regardless of s390dbf implementation details, is d_iname even valid in
all cases (no debugfs)?

>  		return -EBUSY;
> +	}
>  
>  	mutex_lock(&matrix_dev->lock);
>  	vfio_ap_mdev_reset_queues(mdev);
>  	list_del(&matrix_mdev->node);
>  	mutex_unlock(&matrix_dev->lock);
>  
> +	debug_unregister(matrix_mdev->dbf);
>  	kfree(matrix_mdev);
>  	mdev_set_drvdata(mdev, NULL);
>  	atomic_inc(&matrix_dev->available_instances);
> @@ -406,6 +458,22 @@ static struct attribute_group *vfio_ap_mdev_type_groups[] = {
>  	NULL,
>  };
>  
> +static void vfio_ap_mdev_log_sharing_error(struct ap_matrix_mdev *logdev,
> +					   const char *assigned_to,
> +					   unsigned long *apm,
> +					   unsigned long *aqm)
> +{
> +	unsigned long apid, apqi;
> +
> +	for_each_set_bit_inv(apid, apm, AP_DEVICES) {
> +		for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
> +			VFIO_AP_MDEV_DBF(logdev, DBF_ERR,
> +					 "queue %02lx.%04lx already assigned to %s\n",

I'm also not 100% sure about string lifetimes here.

> +					 apid, apqi, assigned_to);
> +		}
> +	}
> +}
> +
>  /**
>   * vfio_ap_mdev_verify_no_sharing
>   *
> @@ -448,22 +516,39 @@ static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev,
>  		if (!bitmap_and(aqm, mdev_aqm, lstdev->matrix.aqm, AP_DOMAINS))
>  			continue;
>  
> +		vfio_ap_mdev_log_sharing_error(matrix_mdev,
> +					       dev_name(mdev_dev(lstdev->mdev)),
> +					       apm, aqm);
> +
>  		return -EADDRINUSE;
>  	}
>  
>  	return 0;
>  }
>  
> -static int vfio_ap_mdev_validate_masks(struct ap_matrix_mdev *matrix_mdev,
> +static int vfio_ap_mdev_validate_masks(struct ap_matrix_mdev *logdev,
>  				       unsigned long *apm, unsigned long *aqm)
>  {
> -	int ret;
> +	int ret = 0;
> +	unsigned long apid, apqi;
> +
> +	for_each_set_bit_inv(apid, apm, AP_DEVICES) {
> +		for_each_set_bit_inv(apqi, aqm, AP_DEVICES) {
> +			if (!ap_owned_by_def_drv(apid, apqi))
> +				continue;
> +
> +			VFIO_AP_MDEV_DBF(logdev, DBF_ERR,
> +					 "queue %02lx.%04lx owned by zcrypt\n",

s/zcrypt/default driver/ ?

> +					 apid, apqi);
> +
> +			ret = -EADDRNOTAVAIL;
> +		}
> +	}
>  
> -	ret = ap_apqn_in_matrix_owned_by_def_drv(apm, aqm);
>  	if (ret)
> -		return (ret == 1) ? -EADDRNOTAVAIL : ret;
> +		return ret;
>  
> -	return vfio_ap_mdev_verify_no_sharing(matrix_mdev, apm, aqm);
> +	return vfio_ap_mdev_verify_no_sharing(logdev, apm, aqm);
>  }
>  
>  static void vfio_ap_mdev_update_crycb(struct ap_matrix_mdev *matrix_mdev)

(...)

> @@ -1013,9 +1132,10 @@ static void vfio_ap_mdev_wait_for_qempty(ap_qid_t qid)
>  			msleep(20);
>  			break;
>  		default:
> -			pr_warn("%s: tapq response %02x waiting for queue %04x.%02x empty\n",
> -				__func__, status.response_code,
> -				AP_QID_CARD(qid), AP_QID_QUEUE(qid));
> +			WARN_ONCE(1,
> +				  "%s: tapq response %02x waiting for queue %04x.%02x empty\n",
> +				  __func__, status.response_code,
> +				  AP_QID_CARD(qid), AP_QID_QUEUE(qid));

Why this change?

>  			return;
>  		}
>  	} while (--retry);

(...)

> diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
> index 5cc3c2ebf151..f717e43e10cf 100644
> --- a/drivers/s390/crypto/vfio_ap_private.h
> +++ b/drivers/s390/crypto/vfio_ap_private.h
> @@ -24,6 +24,21 @@
>  #define VFIO_AP_MODULE_NAME "vfio_ap"
>  #define VFIO_AP_DRV_NAME "vfio_ap"
>  
> +#define DBF_ERR		3	/* error conditions   */
> +#define DBF_WARN	4	/* warning conditions */
> +#define DBF_INFO	5	/* informational      */
> +#define DBF_DEBUG	6	/* for debugging only */

Can you reuse the LOGLEVEL_* constants instead of rolling your own?

> +
> +#define DBF_SPRINTF_MAX_ARGS 5
> +
> +#define VFIO_AP_DBF(d_matrix_dev, ...) \
> +	debug_sprintf_event(d_matrix_dev->dbf, ##__VA_ARGS__)
> +
> +#define VFIO_AP_MDEV_DBF(d_matrix_mdev, ...) \
> +	debug_sprintf_event(d_matrix_mdev->dbf, ##__VA_ARGS__)
> +
> +extern uint dbglvl;
> +
>  /**
>   * ap_matrix_dev - the AP matrix device structure
>   * @device:	generic device structure associated with the AP matrix device
> @@ -43,6 +58,7 @@ struct ap_matrix_dev {
>  	struct list_head mdev_list;
>  	struct mutex lock;
>  	struct ap_driver  *vfio_ap_drv;
> +	debug_info_t *dbf;
>  };
>  
>  extern struct ap_matrix_dev *matrix_dev;
> @@ -77,6 +93,9 @@ struct ap_matrix {
>   * @group_notifier: notifier block used for specifying callback function for
>   *		    handling the VFIO_GROUP_NOTIFY_SET_KVM event
>   * @kvm:	the struct holding guest's state
> + * @pqap_hook:	handler for PQAP instruction
> + * @mdev:	the matrix mediated device

Should updating the description for these two go into a trivial
separate patch?

> + * @dbf:	the debug info log
>   */
>  struct ap_matrix_mdev {
>  	struct list_head node;
> @@ -86,6 +105,7 @@ struct ap_matrix_mdev {
>  	struct kvm *kvm;
>  	struct kvm_s390_module_hook pqap_hook;
>  	struct mdev_device *mdev;
> +	debug_info_t *dbf;
>  };
>  
>  extern int vfio_ap_mdev_register(void);
Anthony Krowiak Aug. 12, 2019, 8:34 p.m. UTC | #2
On 8/12/19 6:35 AM, Cornelia Huck wrote:
> On Wed, 31 Jul 2019 18:41:16 -0400
> Tony Krowiak <akrowiak@linux.ibm.com> wrote:
> 
>> Added two DBF log files for logging events and errors; one for the vfio_ap
>> driver, and one for each matrix mediated device.
> 
> While the s390dbf is useful (especially for accessing the information
> in dumps), trace points are a more standard interface. Have you
> evaluated that as well? (We probably should add something to the
> vfio/mdev code as well; tracing there is a good complement to tracing
> in vendor drivers.)

I assume you are talking about the TRACE() macro here? I have not
evaluated that. I chose s390dbf for the sole reason that the
AP bus (drivers/s390/crypto/ap_bus.c) uses s390dbf. I can look into
using trace points. The genesis of this patch was in response to
comments you made in the previous series (v4). Recall that assignment
of an adapter or domain to a matrix mdev will fail if the APQN(s)
resulting from the assignment are either owned by zcrypt or another
matrix mdev. I said I'd provide a means for the admin to determine
why the assignment failed.

I will look into using trace points, but before I expend the effort
to make such a change, what would be the advantage of trace points
over s390dbf?

> 
> Also, isn't this independent of the rest of the series?

I guess that depends upon your definition of independent. Yes, this
patch could be posted as an entity unto itself, but then the rest of
the series would have to pre-req it given much of the logging is
done in code that has been modified by the series. Is there a
good reason to make this independent?

> 
>>
>> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
>> ---
>>   drivers/s390/crypto/vfio_ap_drv.c     |  34 +++++++
>>   drivers/s390/crypto/vfio_ap_ops.c     | 187 ++++++++++++++++++++++++++++++----
>>   drivers/s390/crypto/vfio_ap_private.h |  20 ++++
>>   3 files changed, 223 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
>> index d8da520ae1fa..04a77246c22a 100644
>> --- a/drivers/s390/crypto/vfio_ap_drv.c
>> +++ b/drivers/s390/crypto/vfio_ap_drv.c
>> @@ -22,6 +22,10 @@ MODULE_AUTHOR("IBM Corporation");
>>   MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018");
>>   MODULE_LICENSE("GPL v2");
>>   
>> +uint dbglvl = 3;
>> +module_param(dbglvl, uint, 0444);
>> +MODULE_PARM_DESC(dbglvl, "VFIO_AP driver debug level.");
> 
> More the default debug level, isn't it? (IIRC, you can change the level
> of the s390dbfs dynamically.)

The default debug level is 3. This allows the admin to change the debug
level at boot time so as not to miss trace events that might occur prior
to using the sysfs 'level' file to change the debug level.

For the record, I had a suggestion from Harald to change the name to
vfio_dbglvl or something of that nature to avoid namespace collisions.

> 
>> +
>>   static struct ap_driver vfio_ap_drv;
>>   
>>   struct ap_matrix_dev *matrix_dev;
>> @@ -158,6 +162,21 @@ static void vfio_ap_matrix_dev_destroy(void)
>>   	root_device_unregister(root_device);
>>   }
>>   
>> +static void vfio_ap_log_queues_in_use(struct ap_matrix_mdev *matrix_mdev,
>> +				  unsigned long *apm, unsigned long *aqm)
>> +{
>> +	unsigned long apid, apqi;
>> +
>> +	for_each_set_bit_inv(apid, apm, AP_DEVICES) {
>> +		for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
>> +			VFIO_AP_DBF(matrix_dev, DBF_ERR,
>> +				    "queue %02lx.%04lx in use by mdev %s\n",
>> +				    apid, apqi,
>> +				    dev_name(mdev_dev(matrix_mdev->mdev)));
> 
> I remember some issues wrt %s in s390dbfs (lifetime); will this dbf
> potentially outlive the mdev? Or is the string copied? (Or has s390dbf
> been changed to avoid that trap? If so, please disregard my comments.)

If I understand your question, then this should not be a problem. The
lifespan of the mdev dbf files coincides with the lifespan of the mdev.
The dbf for the matrix mdev is registered when the mdev is created
and unregistered when the mdev is removed. Likewise, the vfio_ap dbf
is created when the module is initialized and unregistered when the
module is exited.

> 
>> +		}
>> +	}
>> +}
>> +
>>   static bool vfio_ap_resource_in_use(unsigned long *apm, unsigned long *aqm)
>>   {
>>   	bool in_use = false;
>> @@ -179,6 +198,8 @@ static bool vfio_ap_resource_in_use(unsigned long *apm, unsigned long *aqm)
>>   			continue;
>>   
>>   		in_use = true;
>> +		vfio_ap_log_queues_in_use(matrix_mdev, apm_intrsctn,
>> +					  aqm_intrsctn);
>>   	}
>>   
>>   	mutex_unlock(&matrix_dev->lock);
>> @@ -186,6 +207,16 @@ static bool vfio_ap_resource_in_use(unsigned long *apm, unsigned long *aqm)
>>   	return in_use;
>>   }
>>   
>> +static int __init vfio_ap_debug_init(void)
>> +{
>> +	matrix_dev->dbf = debug_register(VFIO_AP_DRV_NAME, 1, 1,
>> +					 DBF_SPRINTF_MAX_ARGS * sizeof(long));
> 
> It seems that debug_register() can possibly fail? (Unlikely, but we
> should check.)

You are right! There should be a check for matrix_dev->dbf not NULL
like we do for the matrix mdev dbf in vfio_ap_mdev_debug_init();

> 
>> +	debug_register_view(matrix_dev->dbf, &debug_sprintf_view);
>> +	debug_set_level(matrix_dev->dbf, dbglvl);
>> +
>> +	return 0;
>> +}
>> +
>>   static int __init vfio_ap_init(void)
>>   {
>>   	int ret;
>> @@ -219,6 +250,8 @@ static int __init vfio_ap_init(void)
>>   		return ret;
>>   	}
>>   
>> +	vfio_ap_debug_init();
>> +
>>   	return 0;
>>   }
>>   
>> @@ -227,6 +260,7 @@ static void __exit vfio_ap_exit(void)
>>   	vfio_ap_mdev_unregister();
>>   	ap_driver_unregister(&vfio_ap_drv);
>>   	vfio_ap_matrix_dev_destroy();
>> +	debug_unregister(matrix_dev->dbf);
>>   }
>>   
>>   module_init(vfio_ap_init);
>> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
>> index 0e748819abb6..1aa18eba43d0 100644
>> --- a/drivers/s390/crypto/vfio_ap_ops.c
>> +++ b/drivers/s390/crypto/vfio_ap_ops.c
>> @@ -167,17 +167,23 @@ static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
>>   		case AP_RESPONSE_INVALID_ADDRESS:
>>   		default:
>>   			/* All cases in default means AP not operational */
>> -			WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
>> -				  status.response_code);
>>   			goto end_free;
>>   		}
>>   	} while (retries--);
>>   
>> -	WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
>> -		  status.response_code);
>>   end_free:
>>   	vfio_ap_free_aqic_resources(q);
>>   	q->matrix_mdev = NULL;
>> +	if (status.response_code) {
> 
> If I read the code correctly, we consider AP_RESPONSE_OTHERWISE_CHANGED
> a success as well, don't we? (Not sure what that means, though.)

It indicates that IRQ enable/disable has already been set as requested,
or a the async portion of a previous PQAP(AQIC) has not yet completed.
This just a warning, not an error.

> 
>> +		VFIO_AP_MDEV_DBF(q->matrix_mdev, DBF_WARN,
>> +			 "IRQ disable failed for queue %02x.%04x: status response code=%u\n",
>> +			 AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
>> +			 status.response_code);
>> +	} else {
>> +		VFIO_AP_MDEV_DBF(q->matrix_mdev, DBF_INFO,
>> +				 "IRQ disabled for queue %02x.%04x\n",
>> +				 AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn));
>> +	}
>>   	return status;
>>   }
>>   
> 
> (...)
> 
>> @@ -321,8 +340,29 @@ static void vfio_ap_matrix_init(struct ap_config_info *info,
>>   	matrix->adm_max = info->apxa ? info->Nd : 15;
>>   }
>>   
>> +static int vfio_ap_mdev_debug_init(struct ap_matrix_mdev *matrix_mdev)
>> +{
>> +	int ret;
>> +
>> +	matrix_mdev->dbf = debug_register(dev_name(mdev_dev(matrix_mdev->mdev)),
>> +					  1, 1,
>> +					  DBF_SPRINTF_MAX_ARGS * sizeof(long));
>> +
>> +	if (!matrix_mdev->dbf)
>> +		return -ENOMEM;
> 
> Ok, here we do check for the result of debug_register().

Of course:)

> 
>> +
>> +	ret = debug_register_view(matrix_mdev->dbf, &debug_sprintf_view);
>> +	if (ret)
>> +		return ret;
> 
> Don't we need to clean up ->dbf in the failure case?

What's to clean up if it failed?

> 
> Also, we probably need to check this as well for the other dbf.

Yes.

> 
>> +
>> +	debug_set_level(matrix_mdev->dbf, dbglvl);
>> +
>> +	return 0;
>> +}
>> +
>>   static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
>>   {
>> +	int ret;
>>   	struct ap_matrix_mdev *matrix_mdev;
>>   
>>   	if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
>> @@ -335,6 +375,13 @@ static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
>>   	}
>>   
>>   	matrix_mdev->mdev = mdev;
>> +
>> +	ret = vfio_ap_mdev_debug_init(matrix_mdev);
>> +	if (ret) {
>> +		kfree(matrix_mdev);
>> +		return ret;
> 
> You also should bump available_instances again in the failure case.

I agree and will fix this.

> 
>> +	}
>> +
>>   	vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
>>   	mdev_set_drvdata(mdev, matrix_mdev);
>>   	matrix_mdev->pqap_hook.hook = handle_pqap;
>> @@ -350,14 +397,19 @@ static int vfio_ap_mdev_remove(struct mdev_device *mdev)
>>   {
>>   	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>>   
>> -	if (matrix_mdev->kvm)
>> +	if (matrix_mdev->kvm) {
>> +		VFIO_AP_MDEV_DBF(matrix_mdev, DBF_ERR,
>> +				 "remove rejected, mdev in use by %s",
>> +				 matrix_mdev->kvm->debugfs_dentry->d_iname);
> 
> Can this be a problem when the kvm goes away (and the d_iname is gone)?
> 
> Regardless of s390dbf implementation details, is d_iname even valid in
> all cases (no debugfs)?

I don't know the answer to that. Can you point me to a way to get the
name of the guest?

> 
>>   		return -EBUSY;
>> +	}
>>   
>>   	mutex_lock(&matrix_dev->lock);
>>   	vfio_ap_mdev_reset_queues(mdev);
>>   	list_del(&matrix_mdev->node);
>>   	mutex_unlock(&matrix_dev->lock);
>>   
>> +	debug_unregister(matrix_mdev->dbf);
>>   	kfree(matrix_mdev);
>>   	mdev_set_drvdata(mdev, NULL);
>>   	atomic_inc(&matrix_dev->available_instances);
>> @@ -406,6 +458,22 @@ static struct attribute_group *vfio_ap_mdev_type_groups[] = {
>>   	NULL,
>>   };
>>   
>> +static void vfio_ap_mdev_log_sharing_error(struct ap_matrix_mdev *logdev,
>> +					   const char *assigned_to,
>> +					   unsigned long *apm,
>> +					   unsigned long *aqm)
>> +{
>> +	unsigned long apid, apqi;
>> +
>> +	for_each_set_bit_inv(apid, apm, AP_DEVICES) {
>> +		for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
>> +			VFIO_AP_MDEV_DBF(logdev, DBF_ERR,
>> +					 "queue %02lx.%04lx already assigned to %s\n",
> 
> I'm also not 100% sure about string lifetimes here.

I don't understand your concern here, can you elaborate?

> 
>> +					 apid, apqi, assigned_to);
>> +		}
>> +	}
>> +}
>> +
>>   /**
>>    * vfio_ap_mdev_verify_no_sharing
>>    *
>> @@ -448,22 +516,39 @@ static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev,
>>   		if (!bitmap_and(aqm, mdev_aqm, lstdev->matrix.aqm, AP_DOMAINS))
>>   			continue;
>>   
>> +		vfio_ap_mdev_log_sharing_error(matrix_mdev,
>> +					       dev_name(mdev_dev(lstdev->mdev)),
>> +					       apm, aqm);
>> +
>>   		return -EADDRINUSE;
>>   	}
>>   
>>   	return 0;
>>   }
>>   
>> -static int vfio_ap_mdev_validate_masks(struct ap_matrix_mdev *matrix_mdev,
>> +static int vfio_ap_mdev_validate_masks(struct ap_matrix_mdev *logdev,
>>   				       unsigned long *apm, unsigned long *aqm)
>>   {
>> -	int ret;
>> +	int ret = 0;
>> +	unsigned long apid, apqi;
>> +
>> +	for_each_set_bit_inv(apid, apm, AP_DEVICES) {
>> +		for_each_set_bit_inv(apqi, aqm, AP_DEVICES) {
>> +			if (!ap_owned_by_def_drv(apid, apqi))
>> +				continue;
>> +
>> +			VFIO_AP_MDEV_DBF(logdev, DBF_ERR,
>> +					 "queue %02lx.%04lx owned by zcrypt\n",
> 
> s/zcrypt/default driver/ ?

I don't like default driver because IMHO default driver implies that if
no driver passes the bus match - which matches based on device type -
then it is bound to some default driver. How about:

s/zcrypt/default zcrypt driver/?

> 
>> +					 apid, apqi);
>> +
>> +			ret = -EADDRNOTAVAIL;
>> +		}
>> +	}
>>   
>> -	ret = ap_apqn_in_matrix_owned_by_def_drv(apm, aqm);
>>   	if (ret)
>> -		return (ret == 1) ? -EADDRNOTAVAIL : ret;
>> +		return ret;
>>   
>> -	return vfio_ap_mdev_verify_no_sharing(matrix_mdev, apm, aqm);
>> +	return vfio_ap_mdev_verify_no_sharing(logdev, apm, aqm);
>>   }
>>   
>>   static void vfio_ap_mdev_update_crycb(struct ap_matrix_mdev *matrix_mdev)
> 
> (...)
> 
>> @@ -1013,9 +1132,10 @@ static void vfio_ap_mdev_wait_for_qempty(ap_qid_t qid)
>>   			msleep(20);
>>   			break;
>>   		default:
>> -			pr_warn("%s: tapq response %02x waiting for queue %04x.%02x empty\n",
>> -				__func__, status.response_code,
>> -				AP_QID_CARD(qid), AP_QID_QUEUE(qid));
>> +			WARN_ONCE(1,
>> +				  "%s: tapq response %02x waiting for queue %04x.%02x empty\n",
>> +				  __func__, status.response_code,
>> +				  AP_QID_CARD(qid), AP_QID_QUEUE(qid));
> 
> Why this change?

Given the return following this, it is probably unnecessary. I'll
restore it.

> 
>>   			return;
>>   		}
>>   	} while (--retry);
> 
> (...)
> 
>> diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
>> index 5cc3c2ebf151..f717e43e10cf 100644
>> --- a/drivers/s390/crypto/vfio_ap_private.h
>> +++ b/drivers/s390/crypto/vfio_ap_private.h
>> @@ -24,6 +24,21 @@
>>   #define VFIO_AP_MODULE_NAME "vfio_ap"
>>   #define VFIO_AP_DRV_NAME "vfio_ap"
>>   
>> +#define DBF_ERR		3	/* error conditions   */
>> +#define DBF_WARN	4	/* warning conditions */
>> +#define DBF_INFO	5	/* informational      */
>> +#define DBF_DEBUG	6	/* for debugging only */
> 
> Can you reuse the LOGLEVEL_* constants instead of rolling your own?

I assume you are talking about the log levels in linux/kern_levels.h?
Those levels range from -2 to 7. The dbf log levels range from 0 to 6.
It looks like most other drivers that use dbf hard code the levels.
I can do that if you prefer.

> 
>> +
>> +#define DBF_SPRINTF_MAX_ARGS 5
>> +
>> +#define VFIO_AP_DBF(d_matrix_dev, ...) \
>> +	debug_sprintf_event(d_matrix_dev->dbf, ##__VA_ARGS__)
>> +
>> +#define VFIO_AP_MDEV_DBF(d_matrix_mdev, ...) \
>> +	debug_sprintf_event(d_matrix_mdev->dbf, ##__VA_ARGS__)
>> +
>> +extern uint dbglvl;
>> +
>>   /**
>>    * ap_matrix_dev - the AP matrix device structure
>>    * @device:	generic device structure associated with the AP matrix device
>> @@ -43,6 +58,7 @@ struct ap_matrix_dev {
>>   	struct list_head mdev_list;
>>   	struct mutex lock;
>>   	struct ap_driver  *vfio_ap_drv;
>> +	debug_info_t *dbf;
>>   };
>>   
>>   extern struct ap_matrix_dev *matrix_dev;
>> @@ -77,6 +93,9 @@ struct ap_matrix {
>>    * @group_notifier: notifier block used for specifying callback function for
>>    *		    handling the VFIO_GROUP_NOTIFY_SET_KVM event
>>    * @kvm:	the struct holding guest's state
>> + * @pqap_hook:	handler for PQAP instruction
>> + * @mdev:	the matrix mediated device
> 
> Should updating the description for these two go into a trivial
> separate patch?

I will if you insist, but what is gained by that?

> 
>> + * @dbf:	the debug info log
>>    */
>>   struct ap_matrix_mdev {
>>   	struct list_head node;
>> @@ -86,6 +105,7 @@ struct ap_matrix_mdev {
>>   	struct kvm *kvm;
>>   	struct kvm_s390_module_hook pqap_hook;
>>   	struct mdev_device *mdev;
>> +	debug_info_t *dbf;
>>   };
>>   
>>   extern int vfio_ap_mdev_register(void);
>
Cornelia Huck Aug. 13, 2019, 10:34 a.m. UTC | #3
On Mon, 12 Aug 2019 16:34:10 -0400
Tony Krowiak <akrowiak@linux.ibm.com> wrote:

> On 8/12/19 6:35 AM, Cornelia Huck wrote:
> > On Wed, 31 Jul 2019 18:41:16 -0400
> > Tony Krowiak <akrowiak@linux.ibm.com> wrote:
> >   
> >> Added two DBF log files for logging events and errors; one for the vfio_ap
> >> driver, and one for each matrix mediated device.  
> > 
> > While the s390dbf is useful (especially for accessing the information
> > in dumps), trace points are a more standard interface. Have you
> > evaluated that as well? (We probably should add something to the
> > vfio/mdev code as well; tracing there is a good complement to tracing
> > in vendor drivers.)  
> 
> I assume you are talking about the TRACE() macro here? I have not

TRACE_EVENT() and friends (Documentation/trace/tracepoints.rst).

> evaluated that. I chose s390dbf for the sole reason that the
> AP bus (drivers/s390/crypto/ap_bus.c) uses s390dbf. I can look into
> using trace points. The genesis of this patch was in response to
> comments you made in the previous series (v4). Recall that assignment
> of an adapter or domain to a matrix mdev will fail if the APQN(s)
> resulting from the assignment are either owned by zcrypt or another
> matrix mdev. I said I'd provide a means for the admin to determine
> why the assignment failed.

Yes, providing a way to find out what happened is definitely a good
idea.

> 
> I will look into using trace points, but before I expend the effort
> to make such a change, what would be the advantage of trace points
> over s390dbf?

The question of what system to use is not a new one :)

I think it boils down to who will use it, and what other drivers you
want to correlate with. If it's zcrypt, s390dbf is the more obvious
choice; if vfio/mdev (which currently does not have real tracing), it
would be trace points. Doing both might be overkill... your call, as you
probably know better who the consumers are.

> 
> > 
> > Also, isn't this independent of the rest of the series?  
> 
> I guess that depends upon your definition of independent. Yes, this
> patch could be posted as an entity unto itself, but then the rest of
> the series would have to pre-req it given much of the logging is
> done in code that has been modified by the series. Is there a
> good reason to make this independent?

Tracing would be nice regardless of this series; but reworking it to
separate out this patch does not really make sense.

> 
> >   
> >>
> >> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
> >> ---
> >>   drivers/s390/crypto/vfio_ap_drv.c     |  34 +++++++
> >>   drivers/s390/crypto/vfio_ap_ops.c     | 187 ++++++++++++++++++++++++++++++----
> >>   drivers/s390/crypto/vfio_ap_private.h |  20 ++++
> >>   3 files changed, 223 insertions(+), 18 deletions(-)
> >>
> >> diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
> >> index d8da520ae1fa..04a77246c22a 100644
> >> --- a/drivers/s390/crypto/vfio_ap_drv.c
> >> +++ b/drivers/s390/crypto/vfio_ap_drv.c
> >> @@ -22,6 +22,10 @@ MODULE_AUTHOR("IBM Corporation");
> >>   MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018");
> >>   MODULE_LICENSE("GPL v2");
> >>   
> >> +uint dbglvl = 3;
> >> +module_param(dbglvl, uint, 0444);
> >> +MODULE_PARM_DESC(dbglvl, "VFIO_AP driver debug level.");  
> > 
> > More the default debug level, isn't it? (IIRC, you can change the level
> > of the s390dbfs dynamically.)  
> 
> The default debug level is 3. This allows the admin to change the debug
> level at boot time so as not to miss trace events that might occur prior
> to using the sysfs 'level' file to change the debug level.

What I meant is that the description should probably use the term
"default debug level".

> 
> For the record, I had a suggestion from Harald to change the name to
> vfio_dbglvl or something of that nature to avoid namespace collisions.
> 
> >   
> >> +
> >>   static struct ap_driver vfio_ap_drv;
> >>   
> >>   struct ap_matrix_dev *matrix_dev;
> >> @@ -158,6 +162,21 @@ static void vfio_ap_matrix_dev_destroy(void)
> >>   	root_device_unregister(root_device);
> >>   }
> >>   
> >> +static void vfio_ap_log_queues_in_use(struct ap_matrix_mdev *matrix_mdev,
> >> +				  unsigned long *apm, unsigned long *aqm)
> >> +{
> >> +	unsigned long apid, apqi;
> >> +
> >> +	for_each_set_bit_inv(apid, apm, AP_DEVICES) {
> >> +		for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
> >> +			VFIO_AP_DBF(matrix_dev, DBF_ERR,
> >> +				    "queue %02lx.%04lx in use by mdev %s\n",
> >> +				    apid, apqi,
> >> +				    dev_name(mdev_dev(matrix_mdev->mdev)));  
> > 
> > I remember some issues wrt %s in s390dbfs (lifetime); will this dbf
> > potentially outlive the mdev? Or is the string copied? (Or has s390dbf
> > been changed to avoid that trap? If so, please disregard my comments.)  
> 
> If I understand your question, then this should not be a problem. The
> lifespan of the mdev dbf files coincides with the lifespan of the mdev.
> The dbf for the matrix mdev is registered when the mdev is created
> and unregistered when the mdev is removed. Likewise, the vfio_ap dbf
> is created when the module is initialized and unregistered when the
> module is exited.

FTR, I was referring to the following from s390dbf.rst:

"IMPORTANT:                                                                      
  Using "%s" in sprintf event functions is dangerous. You can only              
  use "%s" in the sprintf event functions, if the memory for the passed string  
  is available as long as the debug feature exists. The reason behind this is   
  that due to performance considerations only a pointer to the string is stored 
  in  the debug feature. If you log a string that is freed afterwards, you will 
  get an OOPS when inspecting the debug feature, because then the debug feature 
  will access the already freed memory."

But it isn't a problem in this case.

(...)

> >> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> >> index 0e748819abb6..1aa18eba43d0 100644
> >> --- a/drivers/s390/crypto/vfio_ap_ops.c
> >> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> >> @@ -167,17 +167,23 @@ static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
> >>   		case AP_RESPONSE_INVALID_ADDRESS:
> >>   		default:
> >>   			/* All cases in default means AP not operational */
> >> -			WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
> >> -				  status.response_code);
> >>   			goto end_free;
> >>   		}
> >>   	} while (retries--);
> >>   
> >> -	WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
> >> -		  status.response_code);
> >>   end_free:
> >>   	vfio_ap_free_aqic_resources(q);
> >>   	q->matrix_mdev = NULL;
> >> +	if (status.response_code) {  
> > 
> > If I read the code correctly, we consider AP_RESPONSE_OTHERWISE_CHANGED
> > a success as well, don't we? (Not sure what that means, though.)  
> 
> It indicates that IRQ enable/disable has already been set as requested,
> or a the async portion of a previous PQAP(AQIC) has not yet completed.
> This just a warning, not an error.

Ok.

> 
> >   
> >> +		VFIO_AP_MDEV_DBF(q->matrix_mdev, DBF_WARN,
> >> +			 "IRQ disable failed for queue %02x.%04x: status response code=%u\n",
> >> +			 AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
> >> +			 status.response_code);
> >> +	} else {
> >> +		VFIO_AP_MDEV_DBF(q->matrix_mdev, DBF_INFO,
> >> +				 "IRQ disabled for queue %02x.%04x\n",
> >> +				 AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn));
> >> +	}
> >>   	return status;
> >>   }
> >>     
> > 
> > (...)
> >   
> >> @@ -321,8 +340,29 @@ static void vfio_ap_matrix_init(struct ap_config_info *info,
> >>   	matrix->adm_max = info->apxa ? info->Nd : 15;
> >>   }
> >>   
> >> +static int vfio_ap_mdev_debug_init(struct ap_matrix_mdev *matrix_mdev)
> >> +{
> >> +	int ret;
> >> +
> >> +	matrix_mdev->dbf = debug_register(dev_name(mdev_dev(matrix_mdev->mdev)),
> >> +					  1, 1,
> >> +					  DBF_SPRINTF_MAX_ARGS * sizeof(long));
> >> +
> >> +	if (!matrix_mdev->dbf)
> >> +		return -ENOMEM;  
> > 
> > Ok, here we do check for the result of debug_register().  
> 
> Of course:)
> 
> >   
> >> +
> >> +	ret = debug_register_view(matrix_mdev->dbf, &debug_sprintf_view);
> >> +	if (ret)
> >> +		return ret;  
> > 
> > Don't we need to clean up ->dbf in the failure case?  
> 
> What's to clean up if it failed?

debug_register() has allocated some memory, don't we need to free it?

(...)

> >> @@ -350,14 +397,19 @@ static int vfio_ap_mdev_remove(struct mdev_device *mdev)
> >>   {
> >>   	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
> >>   
> >> -	if (matrix_mdev->kvm)
> >> +	if (matrix_mdev->kvm) {
> >> +		VFIO_AP_MDEV_DBF(matrix_mdev, DBF_ERR,
> >> +				 "remove rejected, mdev in use by %s",
> >> +				 matrix_mdev->kvm->debugfs_dentry->d_iname);  
> > 
> > Can this be a problem when the kvm goes away (and the d_iname is gone)?
> > 
> > Regardless of s390dbf implementation details, is d_iname even valid in
> > all cases (no debugfs)?  
> 
> I don't know the answer to that. Can you point me to a way to get the
> name of the guest?

I just checked, and this will break if debugfs is not configured.

Unfortunately, I don't have a good idea for an alternative way to
identify the owning machine :/

> 
> >   
> >>   		return -EBUSY;
> >> +	}
> >>   
> >>   	mutex_lock(&matrix_dev->lock);
> >>   	vfio_ap_mdev_reset_queues(mdev);
> >>   	list_del(&matrix_mdev->node);
> >>   	mutex_unlock(&matrix_dev->lock);
> >>   
> >> +	debug_unregister(matrix_mdev->dbf);
> >>   	kfree(matrix_mdev);
> >>   	mdev_set_drvdata(mdev, NULL);
> >>   	atomic_inc(&matrix_dev->available_instances);
> >> @@ -406,6 +458,22 @@ static struct attribute_group *vfio_ap_mdev_type_groups[] = {
> >>   	NULL,
> >>   };
> >>   
> >> +static void vfio_ap_mdev_log_sharing_error(struct ap_matrix_mdev *logdev,
> >> +					   const char *assigned_to,
> >> +					   unsigned long *apm,
> >> +					   unsigned long *aqm)
> >> +{
> >> +	unsigned long apid, apqi;
> >> +
> >> +	for_each_set_bit_inv(apid, apm, AP_DEVICES) {
> >> +		for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
> >> +			VFIO_AP_MDEV_DBF(logdev, DBF_ERR,
> >> +					 "queue %02lx.%04lx already assigned to %s\n",  
> > 
> > I'm also not 100% sure about string lifetimes here.  
> 
> I don't understand your concern here, can you elaborate?

See the excerpt from the documentation for s390dbf I cited above. Are
we sure that assigned_to will stay around at least as long as the debug
feature does?

> 
> >   
> >> +					 apid, apqi, assigned_to);
> >> +		}
> >> +	}
> >> +}
> >> +
> >>   /**
> >>    * vfio_ap_mdev_verify_no_sharing
> >>    *
> >> @@ -448,22 +516,39 @@ static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev,
> >>   		if (!bitmap_and(aqm, mdev_aqm, lstdev->matrix.aqm, AP_DOMAINS))
> >>   			continue;
> >>   
> >> +		vfio_ap_mdev_log_sharing_error(matrix_mdev,
> >> +					       dev_name(mdev_dev(lstdev->mdev)),
> >> +					       apm, aqm);
> >> +
> >>   		return -EADDRINUSE;
> >>   	}
> >>   
> >>   	return 0;
> >>   }
> >>   
> >> -static int vfio_ap_mdev_validate_masks(struct ap_matrix_mdev *matrix_mdev,
> >> +static int vfio_ap_mdev_validate_masks(struct ap_matrix_mdev *logdev,
> >>   				       unsigned long *apm, unsigned long *aqm)
> >>   {
> >> -	int ret;
> >> +	int ret = 0;
> >> +	unsigned long apid, apqi;
> >> +
> >> +	for_each_set_bit_inv(apid, apm, AP_DEVICES) {
> >> +		for_each_set_bit_inv(apqi, aqm, AP_DEVICES) {
> >> +			if (!ap_owned_by_def_drv(apid, apqi))
> >> +				continue;
> >> +
> >> +			VFIO_AP_MDEV_DBF(logdev, DBF_ERR,
> >> +					 "queue %02lx.%04lx owned by zcrypt\n",  
> > 
> > s/zcrypt/default driver/ ?  
> 
> I don't like default driver because IMHO default driver implies that if
> no driver passes the bus match - which matches based on device type -
> then it is bound to some default driver. How about:
> 
> s/zcrypt/default zcrypt driver/?

Yes, makes sense to me.

(...)

> >> diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
> >> index 5cc3c2ebf151..f717e43e10cf 100644
> >> --- a/drivers/s390/crypto/vfio_ap_private.h
> >> +++ b/drivers/s390/crypto/vfio_ap_private.h
> >> @@ -24,6 +24,21 @@
> >>   #define VFIO_AP_MODULE_NAME "vfio_ap"
> >>   #define VFIO_AP_DRV_NAME "vfio_ap"
> >>   
> >> +#define DBF_ERR		3	/* error conditions   */
> >> +#define DBF_WARN	4	/* warning conditions */
> >> +#define DBF_INFO	5	/* informational      */
> >> +#define DBF_DEBUG	6	/* for debugging only */  
> > 
> > Can you reuse the LOGLEVEL_* constants instead of rolling your own?  
> 
> I assume you are talking about the log levels in linux/kern_levels.h?
> Those levels range from -2 to 7. The dbf log levels range from 0 to 6.
> It looks like most other drivers that use dbf hard code the levels.
> I can do that if you prefer.

If the other users of the s390dbf use hard-coded values, it's probably
ok to do so here as well. (But that sounds like s390dbf should provide
some definitions for levels... in a separate series.)

> 
> >   
> >> +
> >> +#define DBF_SPRINTF_MAX_ARGS 5
> >> +
> >> +#define VFIO_AP_DBF(d_matrix_dev, ...) \
> >> +	debug_sprintf_event(d_matrix_dev->dbf, ##__VA_ARGS__)
> >> +
> >> +#define VFIO_AP_MDEV_DBF(d_matrix_mdev, ...) \
> >> +	debug_sprintf_event(d_matrix_mdev->dbf, ##__VA_ARGS__)
> >> +
> >> +extern uint dbglvl;
> >> +
> >>   /**
> >>    * ap_matrix_dev - the AP matrix device structure
> >>    * @device:	generic device structure associated with the AP matrix device
> >> @@ -43,6 +58,7 @@ struct ap_matrix_dev {
> >>   	struct list_head mdev_list;
> >>   	struct mutex lock;
> >>   	struct ap_driver  *vfio_ap_drv;
> >> +	debug_info_t *dbf;
> >>   };
> >>   
> >>   extern struct ap_matrix_dev *matrix_dev;
> >> @@ -77,6 +93,9 @@ struct ap_matrix {
> >>    * @group_notifier: notifier block used for specifying callback function for
> >>    *		    handling the VFIO_GROUP_NOTIFY_SET_KVM event
> >>    * @kvm:	the struct holding guest's state
> >> + * @pqap_hook:	handler for PQAP instruction
> >> + * @mdev:	the matrix mediated device  
> > 
> > Should updating the description for these two go into a trivial
> > separate patch?  
> 
> I will if you insist, but what is gained by that?

I had to look twice to see that those two fields have not been
introduced by this patch :)

If this series will make the next merge window anyway, no need to spend
effort to split this out, though.

> 
> >   
> >> + * @dbf:	the debug info log
> >>    */
> >>   struct ap_matrix_mdev {
> >>   	struct list_head node;
> >> @@ -86,6 +105,7 @@ struct ap_matrix_mdev {
> >>   	struct kvm *kvm;
> >>   	struct kvm_s390_module_hook pqap_hook;
> >>   	struct mdev_device *mdev;
> >> +	debug_info_t *dbf;
> >>   };
> >>   
> >>   extern int vfio_ap_mdev_register(void);  
> >   
>
diff mbox series

Patch

diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
index d8da520ae1fa..04a77246c22a 100644
--- a/drivers/s390/crypto/vfio_ap_drv.c
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -22,6 +22,10 @@  MODULE_AUTHOR("IBM Corporation");
 MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018");
 MODULE_LICENSE("GPL v2");
 
+uint dbglvl = 3;
+module_param(dbglvl, uint, 0444);
+MODULE_PARM_DESC(dbglvl, "VFIO_AP driver debug level.");
+
 static struct ap_driver vfio_ap_drv;
 
 struct ap_matrix_dev *matrix_dev;
@@ -158,6 +162,21 @@  static void vfio_ap_matrix_dev_destroy(void)
 	root_device_unregister(root_device);
 }
 
+static void vfio_ap_log_queues_in_use(struct ap_matrix_mdev *matrix_mdev,
+				  unsigned long *apm, unsigned long *aqm)
+{
+	unsigned long apid, apqi;
+
+	for_each_set_bit_inv(apid, apm, AP_DEVICES) {
+		for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
+			VFIO_AP_DBF(matrix_dev, DBF_ERR,
+				    "queue %02lx.%04lx in use by mdev %s\n",
+				    apid, apqi,
+				    dev_name(mdev_dev(matrix_mdev->mdev)));
+		}
+	}
+}
+
 static bool vfio_ap_resource_in_use(unsigned long *apm, unsigned long *aqm)
 {
 	bool in_use = false;
@@ -179,6 +198,8 @@  static bool vfio_ap_resource_in_use(unsigned long *apm, unsigned long *aqm)
 			continue;
 
 		in_use = true;
+		vfio_ap_log_queues_in_use(matrix_mdev, apm_intrsctn,
+					  aqm_intrsctn);
 	}
 
 	mutex_unlock(&matrix_dev->lock);
@@ -186,6 +207,16 @@  static bool vfio_ap_resource_in_use(unsigned long *apm, unsigned long *aqm)
 	return in_use;
 }
 
+static int __init vfio_ap_debug_init(void)
+{
+	matrix_dev->dbf = debug_register(VFIO_AP_DRV_NAME, 1, 1,
+					 DBF_SPRINTF_MAX_ARGS * sizeof(long));
+	debug_register_view(matrix_dev->dbf, &debug_sprintf_view);
+	debug_set_level(matrix_dev->dbf, dbglvl);
+
+	return 0;
+}
+
 static int __init vfio_ap_init(void)
 {
 	int ret;
@@ -219,6 +250,8 @@  static int __init vfio_ap_init(void)
 		return ret;
 	}
 
+	vfio_ap_debug_init();
+
 	return 0;
 }
 
@@ -227,6 +260,7 @@  static void __exit vfio_ap_exit(void)
 	vfio_ap_mdev_unregister();
 	ap_driver_unregister(&vfio_ap_drv);
 	vfio_ap_matrix_dev_destroy();
+	debug_unregister(matrix_dev->dbf);
 }
 
 module_init(vfio_ap_init);
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 0e748819abb6..1aa18eba43d0 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -167,17 +167,23 @@  static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
 		case AP_RESPONSE_INVALID_ADDRESS:
 		default:
 			/* All cases in default means AP not operational */
-			WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
-				  status.response_code);
 			goto end_free;
 		}
 	} while (retries--);
 
-	WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
-		  status.response_code);
 end_free:
 	vfio_ap_free_aqic_resources(q);
 	q->matrix_mdev = NULL;
+	if (status.response_code) {
+		VFIO_AP_MDEV_DBF(q->matrix_mdev, DBF_WARN,
+			 "IRQ disable failed for queue %02x.%04x: status response code=%u\n",
+			 AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
+			 status.response_code);
+	} else {
+		VFIO_AP_MDEV_DBF(q->matrix_mdev, DBF_INFO,
+				 "IRQ disabled for queue %02x.%04x\n",
+				 AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn));
+	}
 	return status;
 }
 
@@ -215,6 +221,10 @@  static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
 	case 1:
 		break;
 	default:
+		VFIO_AP_MDEV_DBF(q->matrix_mdev, DBF_ERR,
+				 "IRQ enable failed for queue %02x.%04x: vfio_pin_pages rc=%d\n",
+				 AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
+				 ret);
 		status.response_code = AP_RESPONSE_INVALID_ADDRESS;
 		return status;
 	}
@@ -235,16 +245,25 @@  static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
 		vfio_ap_free_aqic_resources(q);
 		q->saved_pfn = g_pfn;
 		q->saved_isc = isc;
+		VFIO_AP_MDEV_DBF(q->matrix_mdev, DBF_INFO,
+				 "IRQ enabled for queue %02x.%04x",
+				 AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn));
 		break;
 	case AP_RESPONSE_OTHERWISE_CHANGED:
 		/* We could not modify IRQ setings: clear new configuration */
 		vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1);
 		kvm_s390_gisc_unregister(kvm, isc);
+		VFIO_AP_MDEV_DBF(q->matrix_mdev, DBF_WARN,
+				 "IRQ enable failed for queue %02x.%04x: status response code=%u",
+				 AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
+				 status.response_code);
 		break;
 	default:
-		pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn,
-			status.response_code);
 		vfio_ap_irq_disable(q);
+		VFIO_AP_MDEV_DBF(q->matrix_mdev, DBF_WARN,
+				 "IRQ enable failed for queue %02x.%04x: status response code=%u",
+				 AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
+				 status.response_code);
 		break;
 	}
 
@@ -321,8 +340,29 @@  static void vfio_ap_matrix_init(struct ap_config_info *info,
 	matrix->adm_max = info->apxa ? info->Nd : 15;
 }
 
+static int vfio_ap_mdev_debug_init(struct ap_matrix_mdev *matrix_mdev)
+{
+	int ret;
+
+	matrix_mdev->dbf = debug_register(dev_name(mdev_dev(matrix_mdev->mdev)),
+					  1, 1,
+					  DBF_SPRINTF_MAX_ARGS * sizeof(long));
+
+	if (!matrix_mdev->dbf)
+		return -ENOMEM;
+
+	ret = debug_register_view(matrix_mdev->dbf, &debug_sprintf_view);
+	if (ret)
+		return ret;
+
+	debug_set_level(matrix_mdev->dbf, dbglvl);
+
+	return 0;
+}
+
 static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
 {
+	int ret;
 	struct ap_matrix_mdev *matrix_mdev;
 
 	if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
@@ -335,6 +375,13 @@  static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
 	}
 
 	matrix_mdev->mdev = mdev;
+
+	ret = vfio_ap_mdev_debug_init(matrix_mdev);
+	if (ret) {
+		kfree(matrix_mdev);
+		return ret;
+	}
+
 	vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
 	mdev_set_drvdata(mdev, matrix_mdev);
 	matrix_mdev->pqap_hook.hook = handle_pqap;
@@ -350,14 +397,19 @@  static int vfio_ap_mdev_remove(struct mdev_device *mdev)
 {
 	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 
-	if (matrix_mdev->kvm)
+	if (matrix_mdev->kvm) {
+		VFIO_AP_MDEV_DBF(matrix_mdev, DBF_ERR,
+				 "remove rejected, mdev in use by %s",
+				 matrix_mdev->kvm->debugfs_dentry->d_iname);
 		return -EBUSY;
+	}
 
 	mutex_lock(&matrix_dev->lock);
 	vfio_ap_mdev_reset_queues(mdev);
 	list_del(&matrix_mdev->node);
 	mutex_unlock(&matrix_dev->lock);
 
+	debug_unregister(matrix_mdev->dbf);
 	kfree(matrix_mdev);
 	mdev_set_drvdata(mdev, NULL);
 	atomic_inc(&matrix_dev->available_instances);
@@ -406,6 +458,22 @@  static struct attribute_group *vfio_ap_mdev_type_groups[] = {
 	NULL,
 };
 
+static void vfio_ap_mdev_log_sharing_error(struct ap_matrix_mdev *logdev,
+					   const char *assigned_to,
+					   unsigned long *apm,
+					   unsigned long *aqm)
+{
+	unsigned long apid, apqi;
+
+	for_each_set_bit_inv(apid, apm, AP_DEVICES) {
+		for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
+			VFIO_AP_MDEV_DBF(logdev, DBF_ERR,
+					 "queue %02lx.%04lx already assigned to %s\n",
+					 apid, apqi, assigned_to);
+		}
+	}
+}
+
 /**
  * vfio_ap_mdev_verify_no_sharing
  *
@@ -448,22 +516,39 @@  static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev,
 		if (!bitmap_and(aqm, mdev_aqm, lstdev->matrix.aqm, AP_DOMAINS))
 			continue;
 
+		vfio_ap_mdev_log_sharing_error(matrix_mdev,
+					       dev_name(mdev_dev(lstdev->mdev)),
+					       apm, aqm);
+
 		return -EADDRINUSE;
 	}
 
 	return 0;
 }
 
-static int vfio_ap_mdev_validate_masks(struct ap_matrix_mdev *matrix_mdev,
+static int vfio_ap_mdev_validate_masks(struct ap_matrix_mdev *logdev,
 				       unsigned long *apm, unsigned long *aqm)
 {
-	int ret;
+	int ret = 0;
+	unsigned long apid, apqi;
+
+	for_each_set_bit_inv(apid, apm, AP_DEVICES) {
+		for_each_set_bit_inv(apqi, aqm, AP_DEVICES) {
+			if (!ap_owned_by_def_drv(apid, apqi))
+				continue;
+
+			VFIO_AP_MDEV_DBF(logdev, DBF_ERR,
+					 "queue %02lx.%04lx owned by zcrypt\n",
+					 apid, apqi);
+
+			ret = -EADDRNOTAVAIL;
+		}
+	}
 
-	ret = ap_apqn_in_matrix_owned_by_def_drv(apm, aqm);
 	if (ret)
-		return (ret == 1) ? -EADDRNOTAVAIL : ret;
+		return ret;
 
-	return vfio_ap_mdev_verify_no_sharing(matrix_mdev, apm, aqm);
+	return vfio_ap_mdev_verify_no_sharing(logdev, apm, aqm);
 }
 
 static void vfio_ap_mdev_update_crycb(struct ap_matrix_mdev *matrix_mdev)
@@ -536,13 +621,20 @@  static ssize_t assign_adapter_store(struct device *dev,
 					  matrix_mdev->matrix.aqm);
 	if (ret) {
 		mutex_unlock(&matrix_dev->lock);
+		VFIO_AP_MDEV_DBF(matrix_mdev, DBF_ERR,
+				 "failed to assign adapter %lu(%#02lx)\n",
+				 apid, apid);
 		return ret;
 	}
 
+
 	set_bit_inv(apid, matrix_mdev->matrix.apm);
 	vfio_ap_mdev_update_crycb(matrix_mdev);
 	mutex_unlock(&matrix_dev->lock);
 
+	VFIO_AP_MDEV_DBF(matrix_mdev, DBF_INFO,
+			 "assigned adapter %lu(%#02lx)\n", apid, apid);
+
 	return count;
 }
 static DEVICE_ATTR_WO(assign_adapter);
@@ -587,6 +679,9 @@  static ssize_t unassign_adapter_store(struct device *dev,
 	vfio_ap_mdev_update_crycb(matrix_mdev);
 	mutex_unlock(&matrix_dev->lock);
 
+	VFIO_AP_MDEV_DBF(matrix_mdev, DBF_INFO,
+			 "unassigned adapter %lu(%#02lx)\n", apid, apid);
+
 	return count;
 }
 static DEVICE_ATTR_WO(unassign_adapter);
@@ -651,6 +746,9 @@  static ssize_t assign_domain_store(struct device *dev,
 					  aqm);
 	if (ret) {
 		mutex_unlock(&matrix_dev->lock);
+		VFIO_AP_MDEV_DBF(matrix_mdev, DBF_ERR,
+				 "failed to assign domain %lu(%#04lx)\n",
+				 apqi, apqi);
 		return ret;
 	}
 
@@ -658,6 +756,9 @@  static ssize_t assign_domain_store(struct device *dev,
 	vfio_ap_mdev_update_crycb(matrix_mdev);
 	mutex_unlock(&matrix_dev->lock);
 
+	VFIO_AP_MDEV_DBF(matrix_mdev, DBF_INFO,
+			 "assigned domain %lu(%#04lx)\n", apqi, apqi);
+
 	return count;
 }
 static DEVICE_ATTR_WO(assign_domain);
@@ -703,6 +804,9 @@  static ssize_t unassign_domain_store(struct device *dev,
 	vfio_ap_mdev_update_crycb(matrix_mdev);
 	mutex_unlock(&matrix_dev->lock);
 
+	VFIO_AP_MDEV_DBF(matrix_mdev, DBF_INFO,
+			 "unassigned domain %lu(%#02lx)\n", apqi, apqi);
+
 	return count;
 }
 static DEVICE_ATTR_WO(unassign_domain);
@@ -746,6 +850,9 @@  static ssize_t assign_control_domain_store(struct device *dev,
 	vfio_ap_mdev_update_crycb(matrix_mdev);
 	mutex_unlock(&matrix_dev->lock);
 
+	VFIO_AP_MDEV_DBF(matrix_mdev, DBF_INFO,
+			 "assigned control domain %lu(%#02lx)\n", id, id);
+
 	return count;
 }
 static DEVICE_ATTR_WO(assign_control_domain);
@@ -789,6 +896,10 @@  static ssize_t unassign_control_domain_store(struct device *dev,
 	vfio_ap_mdev_update_crycb(matrix_mdev);
 	mutex_unlock(&matrix_dev->lock);
 
+	VFIO_AP_MDEV_DBF(matrix_mdev, DBF_INFO,
+			 "unassigned control domain %lu(%#02lx)\n",
+			 domid, domid);
+
 	return count;
 }
 static DEVICE_ATTR_WO(unassign_control_domain);
@@ -910,6 +1021,9 @@  static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
 	list_for_each_entry(m, &matrix_dev->mdev_list, node) {
 		if ((m != matrix_mdev) && (m->kvm == kvm)) {
 			mutex_unlock(&matrix_dev->lock);
+			VFIO_AP_MDEV_DBF(matrix_mdev, DBF_ERR,
+					 "KVM already set for mdev %s\n",
+					 dev_name(mdev_dev(m->mdev)));
 			return -EPERM;
 		}
 	}
@@ -919,6 +1033,8 @@  static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
 	kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
 	mutex_unlock(&matrix_dev->lock);
 
+	VFIO_AP_MDEV_DBF(matrix_mdev, DBF_INFO, "KVM set for mdev\n");
+
 	return 0;
 }
 
@@ -972,8 +1088,11 @@  static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
 		return NOTIFY_DONE;
 
 	/* If there is no CRYCB pointer, then we can't copy the masks */
-	if (!matrix_mdev->kvm->arch.crypto.crycbd)
+	if (!matrix_mdev->kvm->arch.crypto.crycbd) {
+		VFIO_AP_MDEV_DBF(matrix_mdev, DBF_INFO,
+				 "Failed to set CRYCB masks: missing CRYCBD\n");
 		return NOTIFY_DONE;
+	}
 
 	kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm,
 				  matrix_mdev->matrix.aqm,
@@ -1013,9 +1132,10 @@  static void vfio_ap_mdev_wait_for_qempty(ap_qid_t qid)
 			msleep(20);
 			break;
 		default:
-			pr_warn("%s: tapq response %02x waiting for queue %04x.%02x empty\n",
-				__func__, status.response_code,
-				AP_QID_CARD(qid), AP_QID_QUEUE(qid));
+			WARN_ONCE(1,
+				  "%s: tapq response %02x waiting for queue %04x.%02x empty\n",
+				  __func__, status.response_code,
+				  AP_QID_CARD(qid), AP_QID_QUEUE(qid));
 			return;
 		}
 	} while (--retry);
@@ -1062,8 +1182,16 @@  static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev)
 		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
 				     matrix_mdev->matrix.aqm_max + 1) {
 			ret = vfio_ap_mdev_reset_queue(apid, apqi);
-			if (ret)
+			if (ret) {
 				rc = ret;
+				VFIO_AP_MDEV_DBF(matrix_mdev, DBF_ERR,
+						 "queue %02lx.%04lx reset failed: rc=%d\n",
+						 apid, apqi, ret);
+			} else {
+				VFIO_AP_MDEV_DBF(matrix_mdev, DBF_INFO,
+						 "queue %02lx.%04lx reset\n",
+						 apid, apqi);
+			}
 
 			vfio_ap_irq_disable_apqn(AP_MKQID(apid, apqi));
 		}
@@ -1089,6 +1217,9 @@  static int vfio_ap_mdev_open(struct mdev_device *mdev)
 				     &events, &matrix_mdev->group_notifier);
 	if (ret) {
 		module_put(THIS_MODULE);
+		VFIO_AP_MDEV_DBF(matrix_mdev, DBF_ERR,
+				 "failed to open mdev fd: VFIO_GROUP_NOTIFY notifier registration failed with rc=%d\n",
+				 ret);
 		return ret;
 	}
 
@@ -1096,12 +1227,19 @@  static int vfio_ap_mdev_open(struct mdev_device *mdev)
 	events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
 	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
 				     &events, &matrix_mdev->iommu_notifier);
-	if (!ret)
+	if (!ret) {
+		VFIO_AP_MDEV_DBF(matrix_mdev, DBF_DEBUG,
+				 "opened mdev fd: guest started\n");
 		return ret;
+	}
 
+	VFIO_AP_MDEV_DBF(matrix_mdev, DBF_ERR,
+			 "failed to open mdev fd: VFIO_IOMMU_NOTIFY notifier registration failed with rc=%d\n",
+			 ret);
 	vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
 				 &matrix_mdev->group_notifier);
 	module_put(THIS_MODULE);
+
 	return ret;
 }
 
@@ -1124,6 +1262,9 @@  static void vfio_ap_mdev_release(struct mdev_device *mdev)
 	vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
 				 &matrix_mdev->group_notifier);
 	module_put(THIS_MODULE);
+
+	VFIO_AP_MDEV_DBF(matrix_mdev, DBF_DEBUG,
+			 "released mdev fd: guest terminated\n");
 }
 
 static int vfio_ap_mdev_get_device_info(unsigned long arg)
@@ -1202,6 +1343,11 @@  int vfio_ap_mdev_probe_queue(struct ap_queue *queue)
 	q->apqn = queue->qid;
 	q->saved_isc = VFIO_AP_ISC_INVALID;
 
+	VFIO_AP_DBF(matrix_dev, DBF_DEBUG,
+		    "queue %02x.%04x bound to %s\n",
+		    AP_QID_CARD(queue->qid), AP_QID_QUEUE(queue->qid),
+		    VFIO_AP_DRV_NAME);
+
 	return 0;
 }
 
@@ -1217,4 +1363,9 @@  void vfio_ap_mdev_remove_queue(struct ap_queue *queue)
 	vfio_ap_mdev_reset_queue(apid, apqi);
 	vfio_ap_irq_disable(q);
 	kfree(q);
+
+	VFIO_AP_DBF(matrix_dev, DBF_DEBUG,
+		    "queue %02x.%04x unbound from %s\n",
+		    AP_QID_CARD(queue->qid), AP_QID_QUEUE(queue->qid),
+		    VFIO_AP_DRV_NAME);
 }
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index 5cc3c2ebf151..f717e43e10cf 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -24,6 +24,21 @@ 
 #define VFIO_AP_MODULE_NAME "vfio_ap"
 #define VFIO_AP_DRV_NAME "vfio_ap"
 
+#define DBF_ERR		3	/* error conditions   */
+#define DBF_WARN	4	/* warning conditions */
+#define DBF_INFO	5	/* informational      */
+#define DBF_DEBUG	6	/* for debugging only */
+
+#define DBF_SPRINTF_MAX_ARGS 5
+
+#define VFIO_AP_DBF(d_matrix_dev, ...) \
+	debug_sprintf_event(d_matrix_dev->dbf, ##__VA_ARGS__)
+
+#define VFIO_AP_MDEV_DBF(d_matrix_mdev, ...) \
+	debug_sprintf_event(d_matrix_mdev->dbf, ##__VA_ARGS__)
+
+extern uint dbglvl;
+
 /**
  * ap_matrix_dev - the AP matrix device structure
  * @device:	generic device structure associated with the AP matrix device
@@ -43,6 +58,7 @@  struct ap_matrix_dev {
 	struct list_head mdev_list;
 	struct mutex lock;
 	struct ap_driver  *vfio_ap_drv;
+	debug_info_t *dbf;
 };
 
 extern struct ap_matrix_dev *matrix_dev;
@@ -77,6 +93,9 @@  struct ap_matrix {
  * @group_notifier: notifier block used for specifying callback function for
  *		    handling the VFIO_GROUP_NOTIFY_SET_KVM event
  * @kvm:	the struct holding guest's state
+ * @pqap_hook:	handler for PQAP instruction
+ * @mdev:	the matrix mediated device
+ * @dbf:	the debug info log
  */
 struct ap_matrix_mdev {
 	struct list_head node;
@@ -86,6 +105,7 @@  struct ap_matrix_mdev {
 	struct kvm *kvm;
 	struct kvm_s390_module_hook pqap_hook;
 	struct mdev_device *mdev;
+	debug_info_t *dbf;
 };
 
 extern int vfio_ap_mdev_register(void);