diff mbox series

[v4,12/16] powerpc/pseries/vas: Setup IRQ and fault handling

Message ID 5ac32e4d07bd048e3d687354501d36c334f1c8e0.camel@linux.ibm.com (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series Enable VAS and NX-GZIP support on powerVM | expand

Commit Message

Haren Myneni May 21, 2021, 9:39 a.m. UTC
NX generates an interrupt when sees a fault on the user space
buffer and the hypervisor forwards that interrupt to OS. Then
the kernel handles the interrupt by issuing H_GET_NX_FAULT hcall
to retrieve the fault CRB information.

This patch also adds changes to setup and free IRQ per each
window and also handles the fault by updating the CSB.

Signed-off-by: Haren Myneni <haren@linux.ibm.com>
---
 arch/powerpc/platforms/pseries/vas.c | 111 +++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)

Comments

Nicholas Piggin June 3, 2021, 5:48 a.m. UTC | #1
Excerpts from Haren Myneni's message of May 21, 2021 7:39 pm:
> 
> NX generates an interrupt when sees a fault on the user space
> buffer and the hypervisor forwards that interrupt to OS. Then
> the kernel handles the interrupt by issuing H_GET_NX_FAULT hcall
> to retrieve the fault CRB information.
> 
> This patch also adds changes to setup and free IRQ per each
> window and also handles the fault by updating the CSB.
> 
> Signed-off-by: Haren Myneni <haren@linux.ibm.com>
> ---
>  arch/powerpc/platforms/pseries/vas.c | 111 +++++++++++++++++++++++++++
>  1 file changed, 111 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
> index ef0c455f6e93..31dc17573f50 100644
> --- a/arch/powerpc/platforms/pseries/vas.c
> +++ b/arch/powerpc/platforms/pseries/vas.c
> @@ -224,6 +224,62 @@ int plpar_vas_query_capabilities(const u64 hcall, u8 query_type,
>  }
>  EXPORT_SYMBOL_GPL(plpar_vas_query_capabilities);
>  
> +/*
> + * HCALL to get fault CRB from pHyp.
> + */
> +static int plpar_get_nx_fault(u32 winid, u64 buffer)
> +{
> +	int64_t rc;
> +
> +	rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
> +
> +	switch (rc) {
> +	case H_SUCCESS:
> +		return 0;
> +	case H_PARAMETER:
> +		pr_err("HCALL(%x): Invalid window ID %u\n", H_GET_NX_FAULT,
> +		       winid);
> +		return -EINVAL;
> +	case H_STATE:
> +		pr_err("HCALL(%x): No outstanding faults for window ID %u\n",
> +		       H_GET_NX_FAULT, winid);
> +		return -EINVAL;
> +	case H_PRIVILEGE:
> +		pr_err("HCALL(%x): Window(%u): Invalid fault buffer 0x%llx\n",
> +		       H_GET_NX_FAULT, winid, buffer);
> +		return -EACCES;
> +	default:
> +		pr_err("HCALL(%x): Unexpected error %lld for window(%u)\n",
> +		       H_GET_NX_FAULT, rc, winid);
> +		return -EIO;
> +	}
> +}

Out of curiosity, you get one of these errors and it just drops the
interrupt on the floor. Then what happens, I assume everything
stops. Should it put some error in the csb, or signal the process or
something? Or is there nothing very sane that can be done?

> +
> +/*
> + * Handle the fault interrupt.
> + * When the fault interrupt is received for each window, query pHyp to get
> + * the fault CRB on the specific fault. Then process the CRB by updating
> + * CSB or send signal if the user space CSB is invalid.
> + * Note: pHyp forwards an interrupt for each fault request. So one fault
> + *	CRB to process for each H_GET_NX_FAULT HCALL.
> + */
> +irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
> +{
> +	struct vas_window *txwin = data;
> +	struct coprocessor_request_block crb;
> +	struct vas_user_win_ref *tsk_ref;
> +	int rc;
> +
> +	rc = plpar_get_nx_fault(txwin->winid, (u64)virt_to_phys(&crb));
> +	if (!rc) {
> +		tsk_ref = &txwin->task_ref;
> +		vas_dump_crb(&crb);

This (and existing powernv vas code) is printk()ing a lot of lines per 
fault. This should be pretty normal operation I think? It should avoid
filling the kernel logs, if so. Particularly if it can be triggered by 
userspace.

I know it's existing code, so could be fixed separately from the series.


> +		vas_update_csb(&crb, tsk_ref);

> +	}

> +
> +	return IRQ_HANDLED;
> +}
> +
Haren Myneni June 4, 2021, 1:19 a.m. UTC | #2
On Thu, 2021-06-03 at 15:48 +1000, Nicholas Piggin wrote:
> Excerpts from Haren Myneni's message of May 21, 2021 7:39 pm:
> > NX generates an interrupt when sees a fault on the user space
> > buffer and the hypervisor forwards that interrupt to OS. Then
> > the kernel handles the interrupt by issuing H_GET_NX_FAULT hcall
> > to retrieve the fault CRB information.
> > 
> > This patch also adds changes to setup and free IRQ per each
> > window and also handles the fault by updating the CSB.
> > 
> > Signed-off-by: Haren Myneni <haren@linux.ibm.com>
> > ---
> >  arch/powerpc/platforms/pseries/vas.c | 111
> > +++++++++++++++++++++++++++
> >  1 file changed, 111 insertions(+)
> > 
> > diff --git a/arch/powerpc/platforms/pseries/vas.c
> > b/arch/powerpc/platforms/pseries/vas.c
> > index ef0c455f6e93..31dc17573f50 100644
> > --- a/arch/powerpc/platforms/pseries/vas.c
> > +++ b/arch/powerpc/platforms/pseries/vas.c
> > @@ -224,6 +224,62 @@ int plpar_vas_query_capabilities(const u64
> > hcall, u8 query_type,
> >  }
> >  EXPORT_SYMBOL_GPL(plpar_vas_query_capabilities);
> >  
> > +/*
> > + * HCALL to get fault CRB from pHyp.
> > + */
> > +static int plpar_get_nx_fault(u32 winid, u64 buffer)
> > +{
> > +	int64_t rc;
> > +
> > +	rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
> > +
> > +	switch (rc) {
> > +	case H_SUCCESS:
> > +		return 0;
> > +	case H_PARAMETER:
> > +		pr_err("HCALL(%x): Invalid window ID %u\n",
> > H_GET_NX_FAULT,
> > +		       winid);
> > +		return -EINVAL;
> > +	case H_STATE:
> > +		pr_err("HCALL(%x): No outstanding faults for window ID
> > %u\n",
> > +		       H_GET_NX_FAULT, winid);
> > +		return -EINVAL;
> > +	case H_PRIVILEGE:
> > +		pr_err("HCALL(%x): Window(%u): Invalid fault buffer
> > 0x%llx\n",
> > +		       H_GET_NX_FAULT, winid, buffer);
> > +		return -EACCES;
> > +	default:
> > +		pr_err("HCALL(%x): Unexpected error %lld for
> > window(%u)\n",
> > +		       H_GET_NX_FAULT, rc, winid);
> > +		return -EIO;
> > +	}
> > +}
> 
> Out of curiosity, you get one of these errors and it just drops the
> interrupt on the floor. Then what happens, I assume everything
> stops. Should it put some error in the csb, or signal the process or
> something? Or is there nothing very sane that can be done?

The user space polls on CSB with timout interval. If the kernel or NX
does not return, the request will be timeout.

The hypervisor returns the credit after H_GET_NX_FAULT HCALL is
successful. Also one credit is assigned for each window. So in this
case, the error is coming from the hypervisor and the application can
not issue another request on the same window. 

> 
> > +
> > +/*
> > + * Handle the fault interrupt.
> > + * When the fault interrupt is received for each window, query
> > pHyp to get
> > + * the fault CRB on the specific fault. Then process the CRB by
> > updating
> > + * CSB or send signal if the user space CSB is invalid.
> > + * Note: pHyp forwards an interrupt for each fault request. So one
> > fault
> > + *	CRB to process for each H_GET_NX_FAULT HCALL.
> > + */
> > +irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
> > +{
> > +	struct vas_window *txwin = data;
> > +	struct coprocessor_request_block crb;
> > +	struct vas_user_win_ref *tsk_ref;
> > +	int rc;
> > +
> > +	rc = plpar_get_nx_fault(txwin->winid, (u64)virt_to_phys(&crb));
> > +	if (!rc) {
> > +		tsk_ref = &txwin->task_ref;
> > +		vas_dump_crb(&crb);
> 
> This (and existing powernv vas code) is printk()ing a lot of lines
> per 
> fault. This should be pretty normal operation I think? It should
> avoid
> filling the kernel logs, if so. Particularly if it can be triggered
> by 
> userspace.
> 
> I know it's existing code, so could be fixed separately from the
> series.

printk messages are only if HCALL returns failure or kernel issue (ex:
not valid window and etc on powerNV). These errors should not be
depending on the iser space requests. So generally we should not get
these errors.  

> 
> 
> > +		vas_update_csb(&crb, tsk_ref);
> > +	}
> > +
> > +	return IRQ_HANDLED;
> > +}
> > +
Nicholas Piggin June 5, 2021, 12:43 a.m. UTC | #3
Excerpts from Haren Myneni's message of June 4, 2021 11:19 am:
> On Thu, 2021-06-03 at 15:48 +1000, Nicholas Piggin wrote:
>> Excerpts from Haren Myneni's message of May 21, 2021 7:39 pm:
>> > NX generates an interrupt when sees a fault on the user space
>> > buffer and the hypervisor forwards that interrupt to OS. Then
>> > the kernel handles the interrupt by issuing H_GET_NX_FAULT hcall
>> > to retrieve the fault CRB information.
>> > 
>> > This patch also adds changes to setup and free IRQ per each
>> > window and also handles the fault by updating the CSB.
>> > 
>> > Signed-off-by: Haren Myneni <haren@linux.ibm.com>
>> > ---
>> >  arch/powerpc/platforms/pseries/vas.c | 111
>> > +++++++++++++++++++++++++++
>> >  1 file changed, 111 insertions(+)
>> > 
>> > diff --git a/arch/powerpc/platforms/pseries/vas.c
>> > b/arch/powerpc/platforms/pseries/vas.c
>> > index ef0c455f6e93..31dc17573f50 100644
>> > --- a/arch/powerpc/platforms/pseries/vas.c
>> > +++ b/arch/powerpc/platforms/pseries/vas.c
>> > @@ -224,6 +224,62 @@ int plpar_vas_query_capabilities(const u64
>> > hcall, u8 query_type,
>> >  }
>> >  EXPORT_SYMBOL_GPL(plpar_vas_query_capabilities);
>> >  
>> > +/*
>> > + * HCALL to get fault CRB from pHyp.
>> > + */
>> > +static int plpar_get_nx_fault(u32 winid, u64 buffer)
>> > +{
>> > +	int64_t rc;
>> > +
>> > +	rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
>> > +
>> > +	switch (rc) {
>> > +	case H_SUCCESS:
>> > +		return 0;
>> > +	case H_PARAMETER:
>> > +		pr_err("HCALL(%x): Invalid window ID %u\n",
>> > H_GET_NX_FAULT,
>> > +		       winid);
>> > +		return -EINVAL;
>> > +	case H_STATE:
>> > +		pr_err("HCALL(%x): No outstanding faults for window ID
>> > %u\n",
>> > +		       H_GET_NX_FAULT, winid);
>> > +		return -EINVAL;
>> > +	case H_PRIVILEGE:
>> > +		pr_err("HCALL(%x): Window(%u): Invalid fault buffer
>> > 0x%llx\n",
>> > +		       H_GET_NX_FAULT, winid, buffer);
>> > +		return -EACCES;
>> > +	default:
>> > +		pr_err("HCALL(%x): Unexpected error %lld for
>> > window(%u)\n",
>> > +		       H_GET_NX_FAULT, rc, winid);
>> > +		return -EIO;
>> > +	}
>> > +}
>> 
>> Out of curiosity, you get one of these errors and it just drops the
>> interrupt on the floor. Then what happens, I assume everything
>> stops. Should it put some error in the csb, or signal the process or
>> something? Or is there nothing very sane that can be done?
> 
> The user space polls on CSB with timout interval. If the kernel or NX
> does not return, the request will be timeout.

Okay, if there is no sane way it can respond to the different error 
cases that's not necessarily unreasonable to just print something in the 
kernel log. Hopefully the kernel log would be useful to the 
administrator / developer / etc, but that's pretty rarely the case for 
Linux errors as it is.

> The hypervisor returns the credit after H_GET_NX_FAULT HCALL is
> successful. Also one credit is assigned for each window. So in this
> case, the error is coming from the hypervisor and the application can
> not issue another request on the same window. 
> 
>> 
>> > +
>> > +/*
>> > + * Handle the fault interrupt.
>> > + * When the fault interrupt is received for each window, query
>> > pHyp to get
>> > + * the fault CRB on the specific fault. Then process the CRB by
>> > updating
>> > + * CSB or send signal if the user space CSB is invalid.
>> > + * Note: pHyp forwards an interrupt for each fault request. So one
>> > fault
>> > + *	CRB to process for each H_GET_NX_FAULT HCALL.
>> > + */
>> > +irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
>> > +{
>> > +	struct vas_window *txwin = data;
>> > +	struct coprocessor_request_block crb;
>> > +	struct vas_user_win_ref *tsk_ref;
>> > +	int rc;
>> > +
>> > +	rc = plpar_get_nx_fault(txwin->winid, (u64)virt_to_phys(&crb));
>> > +	if (!rc) {
>> > +		tsk_ref = &txwin->task_ref;
>> > +		vas_dump_crb(&crb);
>> 
>> This (and existing powernv vas code) is printk()ing a lot of lines
>> per 
>> fault. This should be pretty normal operation I think? It should
>> avoid
>> filling the kernel logs, if so. Particularly if it can be triggered
>> by 
>> userspace.
>> 
>> I know it's existing code, so could be fixed separately from the
>> series.
> 
> printk messages are only if HCALL returns failure or kernel issue (ex:
> not valid window and etc on powerNV). These errors should not be
> depending on the iser space requests. So generally we should not get
> these errors.  

Ah I was looking at dump_crb but that's using pr_devel so that's 
probably okay.

Thanks,
Nick
diff mbox series

Patch

diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index ef0c455f6e93..31dc17573f50 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -224,6 +224,62 @@  int plpar_vas_query_capabilities(const u64 hcall, u8 query_type,
 }
 EXPORT_SYMBOL_GPL(plpar_vas_query_capabilities);
 
+/*
+ * HCALL to get fault CRB from pHyp.
+ */
+static int plpar_get_nx_fault(u32 winid, u64 buffer)
+{
+	int64_t rc;
+
+	rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
+
+	switch (rc) {
+	case H_SUCCESS:
+		return 0;
+	case H_PARAMETER:
+		pr_err("HCALL(%x): Invalid window ID %u\n", H_GET_NX_FAULT,
+		       winid);
+		return -EINVAL;
+	case H_STATE:
+		pr_err("HCALL(%x): No outstanding faults for window ID %u\n",
+		       H_GET_NX_FAULT, winid);
+		return -EINVAL;
+	case H_PRIVILEGE:
+		pr_err("HCALL(%x): Window(%u): Invalid fault buffer 0x%llx\n",
+		       H_GET_NX_FAULT, winid, buffer);
+		return -EACCES;
+	default:
+		pr_err("HCALL(%x): Unexpected error %lld for window(%u)\n",
+		       H_GET_NX_FAULT, rc, winid);
+		return -EIO;
+	}
+}
+
+/*
+ * Handle the fault interrupt.
+ * When the fault interrupt is received for each window, query pHyp to get
+ * the fault CRB on the specific fault. Then process the CRB by updating
+ * CSB or send signal if the user space CSB is invalid.
+ * Note: pHyp forwards an interrupt for each fault request. So one fault
+ *	CRB to process for each H_GET_NX_FAULT HCALL.
+ */
+irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
+{
+	struct vas_window *txwin = data;
+	struct coprocessor_request_block crb;
+	struct vas_user_win_ref *tsk_ref;
+	int rc;
+
+	rc = plpar_get_nx_fault(txwin->winid, (u64)virt_to_phys(&crb));
+	if (!rc) {
+		tsk_ref = &txwin->task_ref;
+		vas_dump_crb(&crb);
+		vas_update_csb(&crb, tsk_ref);
+	}
+
+	return IRQ_HANDLED;
+}
+
 /*
  * Allocate window and setup IRQ mapping.
  */
@@ -235,10 +291,51 @@  static int allocate_setup_window(struct vas_window *txwin,
 	rc = plpar_vas_allocate_window(txwin, domain, wintype, DEF_WIN_CREDS);
 	if (rc)
 		return rc;
+	/*
+	 * On powerVM, pHyp setup and forwards the fault interrupt per
+	 * window. So the IRQ setup and fault handling will be done for
+	 * each open window separately.
+	 */
+	txwin->lpar.fault_virq = irq_create_mapping(NULL,
+						    txwin->lpar.fault_irq);
+	if (!txwin->lpar.fault_virq) {
+		pr_err("Failed irq mapping %d\n", txwin->lpar.fault_irq);
+		rc = -EINVAL;
+		goto out_win;
+	}
+
+	txwin->lpar.name = kasprintf(GFP_KERNEL, "vas-win-%d", txwin->winid);
+	if (!txwin->lpar.name) {
+		rc = -ENOMEM;
+		goto out_irq;
+	}
+
+	rc = request_threaded_irq(txwin->lpar.fault_virq, NULL,
+				  pseries_vas_fault_thread_fn, IRQF_ONESHOT,
+				  txwin->lpar.name, txwin);
+	if (rc) {
+		pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
+		       txwin->winid, txwin->lpar.fault_virq, rc);
+		goto out_free;
+	}
 
 	txwin->wcreds_max = DEF_WIN_CREDS;
 
 	return 0;
+out_free:
+	kfree(txwin->lpar.name);
+out_irq:
+	irq_dispose_mapping(txwin->lpar.fault_virq);
+out_win:
+	plpar_vas_deallocate_window(txwin->winid);
+	return rc;
+}
+
+static inline void free_irq_setup(struct vas_window *txwin)
+{
+	free_irq(txwin->lpar.fault_virq, txwin);
+	irq_dispose_mapping(txwin->lpar.fault_virq);
+	kfree(txwin->lpar.name);
 }
 
 static struct vas_window *vas_allocate_window(struct vas_tx_win_open_attr *uattr,
@@ -353,6 +450,11 @@  static struct vas_window *vas_allocate_window(struct vas_tx_win_open_attr *uattr
 	return txwin;
 
 out_free:
+	/*
+	 * Window is not operational. Free IRQ before closing
+	 * window so that do not have to hold mutex.
+	 */
+	free_irq_setup(txwin);
 	plpar_vas_deallocate_window(txwin->winid);
 out:
 	atomic_dec(&ct_caps->used_lpar_creds);
@@ -371,7 +473,16 @@  static int deallocate_free_window(struct vas_window *win)
 {
 	int rc = 0;
 
+	/*
+	 * Free IRQ after executing H_DEALLOCATE_VAS_WINDOW HCALL
+	 * to close the window. pHyp waits for all requests including
+	 * faults are processed before closing the window - Means all
+	 * credits are returned. In the case of fault request, credit
+	 * is returned after OS issues H_GET_NX_FAULT HCALL.
+	 */
 	rc = plpar_vas_deallocate_window(win->winid);
+	if (!rc)
+		free_irq_setup(win);
 
 	return rc;
 }