diff mbox series

drivers/xen: Improve the late XenStore init protocol

Message ID 20240515014330.1044617-1-xin.wang2@amd.com (mailing list archive)
State Superseded
Headers show
Series drivers/xen: Improve the late XenStore init protocol | expand

Commit Message

Henry Wang May 15, 2024, 1:43 a.m. UTC
Currently, the late XenStore init protocol is only triggered properly
for the case that HVM_PARAM_STORE_PFN is ~0ULL (invalid). For the
case that XenStore interface is allocated but not ready (the connection
status is not XENSTORE_CONNECTED), Linux should also wait until the
XenStore is set up properly.

Introduce a macro to describe the XenStore interface is ready, use
it in xenbus_probe_initcall() and xenbus_probe() to select the code
path of doing the late XenStore init protocol or not.

Take the opportunity to enhance the check of the allocated XenStore
interface can be properly mapped, and return error early if the
memremap() fails.

Signed-off-by: Henry Wang <xin.wang2@amd.com>
Signed-off-by: Michal Orzel <michal.orzel@amd.com>
---
 drivers/xen/xenbus/xenbus_probe.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

Comments

Stefano Stabellini May 15, 2024, 10:30 p.m. UTC | #1
On Wed, 15 May 2024, Henry Wang wrote:
> Currently, the late XenStore init protocol is only triggered properly
> for the case that HVM_PARAM_STORE_PFN is ~0ULL (invalid). For the
> case that XenStore interface is allocated but not ready (the connection
> status is not XENSTORE_CONNECTED), Linux should also wait until the
> XenStore is set up properly.
> 
> Introduce a macro to describe the XenStore interface is ready, use
> it in xenbus_probe_initcall() and xenbus_probe() to select the code
> path of doing the late XenStore init protocol or not.
> 
> Take the opportunity to enhance the check of the allocated XenStore
> interface can be properly mapped, and return error early if the
> memremap() fails.
> 
> Signed-off-by: Henry Wang <xin.wang2@amd.com>
> Signed-off-by: Michal Orzel <michal.orzel@amd.com>

Please add a Fixes: tag


> ---
>  drivers/xen/xenbus/xenbus_probe.c | 21 ++++++++++++++++-----
>  1 file changed, 16 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
> index 3205e5d724c8..8aec0ed1d047 100644
> --- a/drivers/xen/xenbus/xenbus_probe.c
> +++ b/drivers/xen/xenbus/xenbus_probe.c
> @@ -72,6 +72,10 @@ EXPORT_SYMBOL_GPL(xen_store_evtchn);
>  struct xenstore_domain_interface *xen_store_interface;
>  EXPORT_SYMBOL_GPL(xen_store_interface);
>  
> +#define XS_INTERFACE_READY \
> +	((xen_store_interface != NULL) && \
> +	 (xen_store_interface->connection == XENSTORE_CONNECTED))
> +
>  enum xenstore_init xen_store_domain_type;
>  EXPORT_SYMBOL_GPL(xen_store_domain_type);
>  
> @@ -751,9 +755,10 @@ static void xenbus_probe(void)
>  {
>  	xenstored_ready = 1;
>  
> -	if (!xen_store_interface) {
> -		xen_store_interface = memremap(xen_store_gfn << XEN_PAGE_SHIFT,
> -					       XEN_PAGE_SIZE, MEMREMAP_WB);
> +	if (!xen_store_interface || XS_INTERFACE_READY) {
> +		if (!xen_store_interface)

These two nested if's don't make sense to me. If XS_INTERFACE_READY
succeeds, it means that  ((xen_store_interface != NULL) &&
(xen_store_interface->connection == XENSTORE_CONNECTED)).

So it is not possible that xen_store_interface == NULL immediately
after. Right?


> +			xen_store_interface = memremap(xen_store_gfn << XEN_PAGE_SHIFT,
> +						       XEN_PAGE_SIZE, MEMREMAP_WB);
>  		/*
>  		 * Now it is safe to free the IRQ used for xenstore late
>  		 * initialization. No need to unbind: it is about to be
> @@ -822,7 +827,7 @@ static int __init xenbus_probe_initcall(void)
>  	if (xen_store_domain_type == XS_PV ||
>  	    (xen_store_domain_type == XS_HVM &&
>  	     !xs_hvm_defer_init_for_callback() &&
> -	     xen_store_interface != NULL))
> +	     XS_INTERFACE_READY))
>  		xenbus_probe();
>  
>  	/*
> @@ -831,7 +836,7 @@ static int __init xenbus_probe_initcall(void)
>  	 * started, then probe.  It will be triggered when communication
>  	 * starts happening, by waiting on xb_waitq.
>  	 */
> -	if (xen_store_domain_type == XS_LOCAL || xen_store_interface == NULL) {
> +	if (xen_store_domain_type == XS_LOCAL || !XS_INTERFACE_READY) {
>  		struct task_struct *probe_task;
>  
>  		probe_task = kthread_run(xenbus_probe_thread, NULL,
> @@ -1014,6 +1019,12 @@ static int __init xenbus_init(void)
>  			xen_store_interface =
>  				memremap(xen_store_gfn << XEN_PAGE_SHIFT,
>  					 XEN_PAGE_SIZE, MEMREMAP_WB);
> +			if (!xen_store_interface) {
> +				pr_err("%s: cannot map HVM_PARAM_STORE_PFN=%llx\n",
> +				       __func__, v);
> +				err = -ENOMEM;

I think this should -EINVAL


> +				goto out_error;
> +			}
>  			if (xen_store_interface->connection != XENSTORE_CONNECTED)
>  				wait = true;
>  		}
> -- 
> 2.34.1
>
Henry Wang May 16, 2024, 1:25 a.m. UTC | #2
Hi Stefano,

On 5/16/2024 6:30 AM, Stefano Stabellini wrote:
> On Wed, 15 May 2024, Henry Wang wrote:
>> Currently, the late XenStore init protocol is only triggered properly
>> for the case that HVM_PARAM_STORE_PFN is ~0ULL (invalid). For the
>> case that XenStore interface is allocated but not ready (the connection
>> status is not XENSTORE_CONNECTED), Linux should also wait until the
>> XenStore is set up properly.
>>
>> Introduce a macro to describe the XenStore interface is ready, use
>> it in xenbus_probe_initcall() and xenbus_probe() to select the code
>> path of doing the late XenStore init protocol or not.
>>
>> Take the opportunity to enhance the check of the allocated XenStore
>> interface can be properly mapped, and return error early if the
>> memremap() fails.
>>
>> Signed-off-by: Henry Wang <xin.wang2@amd.com>
>> Signed-off-by: Michal Orzel <michal.orzel@amd.com>
> Please add a Fixes: tag

Sure. Will do.

>> ---
>>   drivers/xen/xenbus/xenbus_probe.c | 21 ++++++++++++++++-----
>>   1 file changed, 16 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
>> index 3205e5d724c8..8aec0ed1d047 100644
>> --- a/drivers/xen/xenbus/xenbus_probe.c
>> +++ b/drivers/xen/xenbus/xenbus_probe.c
>> @@ -72,6 +72,10 @@ EXPORT_SYMBOL_GPL(xen_store_evtchn);
>>   struct xenstore_domain_interface *xen_store_interface;
>>   EXPORT_SYMBOL_GPL(xen_store_interface);
>>   
>> +#define XS_INTERFACE_READY \
>> +	((xen_store_interface != NULL) && \
>> +	 (xen_store_interface->connection == XENSTORE_CONNECTED))
>> +
>>   enum xenstore_init xen_store_domain_type;
>>   EXPORT_SYMBOL_GPL(xen_store_domain_type);
>>   
>> @@ -751,9 +755,10 @@ static void xenbus_probe(void)
>>   {
>>   	xenstored_ready = 1;
>>   
>> -	if (!xen_store_interface) {
>> -		xen_store_interface = memremap(xen_store_gfn << XEN_PAGE_SHIFT,
>> -					       XEN_PAGE_SIZE, MEMREMAP_WB);
>> +	if (!xen_store_interface || XS_INTERFACE_READY) {
>> +		if (!xen_store_interface)
> These two nested if's don't make sense to me. If XS_INTERFACE_READY
> succeeds, it means that  ((xen_store_interface != NULL) &&
> (xen_store_interface->connection == XENSTORE_CONNECTED)).
>
> So it is not possible that xen_store_interface == NULL immediately
> after. Right?

I think this is because we want to free the irq for the late init case, 
otherwise the init-dom0less will fail. For the xenstore PFN allocated 
case, the connection is already set to CONNECTED when we execute 
init-dom0less. But I agree with you, would below diff makes more sense 
to you?

diff --git a/drivers/xen/xenbus/xenbus_probe.c 
b/drivers/xen/xenbus/xenbus_probe.c
index 8aec0ed1d047..b8005b651a29 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -76,6 +76,8 @@ EXPORT_SYMBOL_GPL(xen_store_interface);
         ((xen_store_interface != NULL) && \
          (xen_store_interface->connection == XENSTORE_CONNECTED))

+static bool xs_late_init = false;
+
  enum xenstore_init xen_store_domain_type;
  EXPORT_SYMBOL_GPL(xen_store_domain_type);

@@ -755,7 +757,7 @@ static void xenbus_probe(void)
  {
         xenstored_ready = 1;

-       if (!xen_store_interface || XS_INTERFACE_READY) {
+       if (xs_late_init) {
                 if (!xen_store_interface)
                         xen_store_interface = memremap(xen_store_gfn << 
XEN_PAGE_SHIFT,
XEN_PAGE_SIZE, MEMREMAP_WB);
@@ -937,6 +939,8 @@ static irqreturn_t xenbus_late_init(int irq, void 
*unused)
         int err;
         uint64_t v = 0;

+       xs_late_init = true;
+
         err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
         if (err || !v || !~v)
                 return IRQ_HANDLED;

>> +			xen_store_interface = memremap(xen_store_gfn << XEN_PAGE_SHIFT,
>> +						       XEN_PAGE_SIZE, MEMREMAP_WB);
>>   		/*
>>   		 * Now it is safe to free the IRQ used for xenstore late
>>   		 * initialization. No need to unbind: it is about to be
>> @@ -822,7 +827,7 @@ static int __init xenbus_probe_initcall(void)
>>   	if (xen_store_domain_type == XS_PV ||
>>   	    (xen_store_domain_type == XS_HVM &&
>>   	     !xs_hvm_defer_init_for_callback() &&
>> -	     xen_store_interface != NULL))
>> +	     XS_INTERFACE_READY))
>>   		xenbus_probe();
>>   
>>   	/*
>> @@ -831,7 +836,7 @@ static int __init xenbus_probe_initcall(void)
>>   	 * started, then probe.  It will be triggered when communication
>>   	 * starts happening, by waiting on xb_waitq.
>>   	 */
>> -	if (xen_store_domain_type == XS_LOCAL || xen_store_interface == NULL) {
>> +	if (xen_store_domain_type == XS_LOCAL || !XS_INTERFACE_READY) {
>>   		struct task_struct *probe_task;
>>   
>>   		probe_task = kthread_run(xenbus_probe_thread, NULL,
>> @@ -1014,6 +1019,12 @@ static int __init xenbus_init(void)
>>   			xen_store_interface =
>>   				memremap(xen_store_gfn << XEN_PAGE_SHIFT,
>>   					 XEN_PAGE_SIZE, MEMREMAP_WB);
>> +			if (!xen_store_interface) {
>> +				pr_err("%s: cannot map HVM_PARAM_STORE_PFN=%llx\n",
>> +				       __func__, v);
>> +				err = -ENOMEM;
> I think this should -EINVAL

Will change.

Kind regards,
Henry

>> +				goto out_error;
>> +			}
>>   			if (xen_store_interface->connection != XENSTORE_CONNECTED)
>>   				wait = true;
>>   		}
>> -- 
>> 2.34.1
>>
Stefano Stabellini May 17, 2024, 12:52 a.m. UTC | #3
On Thu, 16 May 2024, Henry Wang wrote:
> Hi Stefano,
> 
> On 5/16/2024 6:30 AM, Stefano Stabellini wrote:
> > On Wed, 15 May 2024, Henry Wang wrote:
> > > Currently, the late XenStore init protocol is only triggered properly
> > > for the case that HVM_PARAM_STORE_PFN is ~0ULL (invalid). For the
> > > case that XenStore interface is allocated but not ready (the connection
> > > status is not XENSTORE_CONNECTED), Linux should also wait until the
> > > XenStore is set up properly.
> > > 
> > > Introduce a macro to describe the XenStore interface is ready, use
> > > it in xenbus_probe_initcall() and xenbus_probe() to select the code
> > > path of doing the late XenStore init protocol or not.
> > > 
> > > Take the opportunity to enhance the check of the allocated XenStore
> > > interface can be properly mapped, and return error early if the
> > > memremap() fails.
> > > 
> > > Signed-off-by: Henry Wang <xin.wang2@amd.com>
> > > Signed-off-by: Michal Orzel <michal.orzel@amd.com>
> > Please add a Fixes: tag
> 
> Sure. Will do.
> 
> > > ---
> > >   drivers/xen/xenbus/xenbus_probe.c | 21 ++++++++++++++++-----
> > >   1 file changed, 16 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/drivers/xen/xenbus/xenbus_probe.c
> > > b/drivers/xen/xenbus/xenbus_probe.c
> > > index 3205e5d724c8..8aec0ed1d047 100644
> > > --- a/drivers/xen/xenbus/xenbus_probe.c
> > > +++ b/drivers/xen/xenbus/xenbus_probe.c
> > > @@ -72,6 +72,10 @@ EXPORT_SYMBOL_GPL(xen_store_evtchn);
> > >   struct xenstore_domain_interface *xen_store_interface;
> > >   EXPORT_SYMBOL_GPL(xen_store_interface);
> > >   +#define XS_INTERFACE_READY \
> > > +	((xen_store_interface != NULL) && \
> > > +	 (xen_store_interface->connection == XENSTORE_CONNECTED))
> > > +
> > >   enum xenstore_init xen_store_domain_type;
> > >   EXPORT_SYMBOL_GPL(xen_store_domain_type);
> > >   @@ -751,9 +755,10 @@ static void xenbus_probe(void)
> > >   {
> > >   	xenstored_ready = 1;
> > >   -	if (!xen_store_interface) {
> > > -		xen_store_interface = memremap(xen_store_gfn <<
> > > XEN_PAGE_SHIFT,
> > > -					       XEN_PAGE_SIZE, MEMREMAP_WB);
> > > +	if (!xen_store_interface || XS_INTERFACE_READY) {
> > > +		if (!xen_store_interface)
> > These two nested if's don't make sense to me. If XS_INTERFACE_READY
> > succeeds, it means that  ((xen_store_interface != NULL) &&
> > (xen_store_interface->connection == XENSTORE_CONNECTED)).
> > 
> > So it is not possible that xen_store_interface == NULL immediately
> > after. Right?
> 
> I think this is because we want to free the irq for the late init case,
> otherwise the init-dom0less will fail. For the xenstore PFN allocated case,
> the connection is already set to CONNECTED when we execute init-dom0less. But
> I agree with you, would below diff makes more sense to you?
> 
> diff --git a/drivers/xen/xenbus/xenbus_probe.c
> b/drivers/xen/xenbus/xenbus_probe.c
> index 8aec0ed1d047..b8005b651a29 100644
> --- a/drivers/xen/xenbus/xenbus_probe.c
> +++ b/drivers/xen/xenbus/xenbus_probe.c
> @@ -76,6 +76,8 @@ EXPORT_SYMBOL_GPL(xen_store_interface);
>         ((xen_store_interface != NULL) && \
>          (xen_store_interface->connection == XENSTORE_CONNECTED))
> 
> +static bool xs_late_init = false;
> +
>  enum xenstore_init xen_store_domain_type;
>  EXPORT_SYMBOL_GPL(xen_store_domain_type);
> 
> @@ -755,7 +757,7 @@ static void xenbus_probe(void)
>  {
>         xenstored_ready = 1;
> 
> -       if (!xen_store_interface || XS_INTERFACE_READY) {
> +       if (xs_late_init) {
>                 if (!xen_store_interface)
>                         xen_store_interface = memremap(xen_store_gfn <<


I would just remove the outer 'if' and do this:


	if (!xen_store_interface)
		xen_store_interface = memremap(xen_store_gfn << XEN_PAGE_SHIFT,
				XEN_PAGE_SIZE, MEMREMAP_WB);
	/*
	 * Now it is safe to free the IRQ used for xenstore late
	 * initialization. No need to unbind: it is about to be
	 * bound again from xb_init_comms. Note that calling
	 * unbind_from_irqhandler now would result in xen_evtchn_close()
	 * being called and the event channel not being enabled again
	 * afterwards, resulting in missed event notifications.
	 */
	if (xs_init_irq > 0)
		free_irq(xs_init_irq, &xb_waitq);


I think this should work fine in all cases. I am unsure if
xs_init_irq==0 is possible valid value for xs_init_irq. If it is not,
then we are fine. If 0 is a possible valid irq number, then we should
initialize xs_init_irq to -1, and here check for xs_init_irq >= 0.
Henry Wang May 17, 2024, 1:17 a.m. UTC | #4
Hi Stefano,

On 5/17/2024 8:52 AM, Stefano Stabellini wrote:
> On Thu, 16 May 2024, Henry Wang wrote:
>>>>    enum xenstore_init xen_store_domain_type;
>>>>    EXPORT_SYMBOL_GPL(xen_store_domain_type);
>>>>    @@ -751,9 +755,10 @@ static void xenbus_probe(void)
>>>>    {
>>>>    	xenstored_ready = 1;
>>>>    -	if (!xen_store_interface) {
>>>> -		xen_store_interface = memremap(xen_store_gfn <<
>>>> XEN_PAGE_SHIFT,
>>>> -					       XEN_PAGE_SIZE, MEMREMAP_WB);
>>>> +	if (!xen_store_interface || XS_INTERFACE_READY) {
>>>> +		if (!xen_store_interface)
>>> These two nested if's don't make sense to me. If XS_INTERFACE_READY
>>> succeeds, it means that  ((xen_store_interface != NULL) &&
>>> (xen_store_interface->connection == XENSTORE_CONNECTED)).
>>>
>>> So it is not possible that xen_store_interface == NULL immediately
>>> after. Right?
>> I think this is because we want to free the irq for the late init case,
>> otherwise the init-dom0less will fail. For the xenstore PFN allocated case,
>> the connection is already set to CONNECTED when we execute init-dom0less. But
>> I agree with you, would below diff makes more sense to you?
>>
>> diff --git a/drivers/xen/xenbus/xenbus_probe.c
>> b/drivers/xen/xenbus/xenbus_probe.c
>> index 8aec0ed1d047..b8005b651a29 100644
>> --- a/drivers/xen/xenbus/xenbus_probe.c
>> +++ b/drivers/xen/xenbus/xenbus_probe.c
>> @@ -76,6 +76,8 @@ EXPORT_SYMBOL_GPL(xen_store_interface);
>>          ((xen_store_interface != NULL) && \
>>           (xen_store_interface->connection == XENSTORE_CONNECTED))
>>
>> +static bool xs_late_init = false;
>> +
>>   enum xenstore_init xen_store_domain_type;
>>   EXPORT_SYMBOL_GPL(xen_store_domain_type);
>>
>> @@ -755,7 +757,7 @@ static void xenbus_probe(void)
>>   {
>>          xenstored_ready = 1;
>>
>> -       if (!xen_store_interface || XS_INTERFACE_READY) {
>> +       if (xs_late_init) {
>>                  if (!xen_store_interface)
>>                          xen_store_interface = memremap(xen_store_gfn <<
>
> I would just remove the outer 'if' and do this:
>
>
> 	if (!xen_store_interface)
> 		xen_store_interface = memremap(xen_store_gfn << XEN_PAGE_SHIFT,
> 				XEN_PAGE_SIZE, MEMREMAP_WB);
> 	/*
> 	 * Now it is safe to free the IRQ used for xenstore late
> 	 * initialization. No need to unbind: it is about to be
> 	 * bound again from xb_init_comms. Note that calling
> 	 * unbind_from_irqhandler now would result in xen_evtchn_close()
> 	 * being called and the event channel not being enabled again
> 	 * afterwards, resulting in missed event notifications.
> 	 */
> 	if (xs_init_irq > 0)
> 		free_irq(xs_init_irq, &xb_waitq);
>
>
> I think this should work fine in all cases.

Thanks. I followed your suggestion in v2.

>   I am unsure if
> xs_init_irq==0 is possible valid value for xs_init_irq. If it is not,
> then we are fine. If 0 is a possible valid irq number, then we should
> initialize xs_init_irq to -1, and here check for xs_init_irq >= 0.

Yeah the xs_init_irq==0 is a valid value. I followed your latter comment 
to init it to -1 and check it >=0.

Kind regards,
Henry
diff mbox series

Patch

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 3205e5d724c8..8aec0ed1d047 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -72,6 +72,10 @@  EXPORT_SYMBOL_GPL(xen_store_evtchn);
 struct xenstore_domain_interface *xen_store_interface;
 EXPORT_SYMBOL_GPL(xen_store_interface);
 
+#define XS_INTERFACE_READY \
+	((xen_store_interface != NULL) && \
+	 (xen_store_interface->connection == XENSTORE_CONNECTED))
+
 enum xenstore_init xen_store_domain_type;
 EXPORT_SYMBOL_GPL(xen_store_domain_type);
 
@@ -751,9 +755,10 @@  static void xenbus_probe(void)
 {
 	xenstored_ready = 1;
 
-	if (!xen_store_interface) {
-		xen_store_interface = memremap(xen_store_gfn << XEN_PAGE_SHIFT,
-					       XEN_PAGE_SIZE, MEMREMAP_WB);
+	if (!xen_store_interface || XS_INTERFACE_READY) {
+		if (!xen_store_interface)
+			xen_store_interface = memremap(xen_store_gfn << XEN_PAGE_SHIFT,
+						       XEN_PAGE_SIZE, MEMREMAP_WB);
 		/*
 		 * Now it is safe to free the IRQ used for xenstore late
 		 * initialization. No need to unbind: it is about to be
@@ -822,7 +827,7 @@  static int __init xenbus_probe_initcall(void)
 	if (xen_store_domain_type == XS_PV ||
 	    (xen_store_domain_type == XS_HVM &&
 	     !xs_hvm_defer_init_for_callback() &&
-	     xen_store_interface != NULL))
+	     XS_INTERFACE_READY))
 		xenbus_probe();
 
 	/*
@@ -831,7 +836,7 @@  static int __init xenbus_probe_initcall(void)
 	 * started, then probe.  It will be triggered when communication
 	 * starts happening, by waiting on xb_waitq.
 	 */
-	if (xen_store_domain_type == XS_LOCAL || xen_store_interface == NULL) {
+	if (xen_store_domain_type == XS_LOCAL || !XS_INTERFACE_READY) {
 		struct task_struct *probe_task;
 
 		probe_task = kthread_run(xenbus_probe_thread, NULL,
@@ -1014,6 +1019,12 @@  static int __init xenbus_init(void)
 			xen_store_interface =
 				memremap(xen_store_gfn << XEN_PAGE_SHIFT,
 					 XEN_PAGE_SIZE, MEMREMAP_WB);
+			if (!xen_store_interface) {
+				pr_err("%s: cannot map HVM_PARAM_STORE_PFN=%llx\n",
+				       __func__, v);
+				err = -ENOMEM;
+				goto out_error;
+			}
 			if (xen_store_interface->connection != XENSTORE_CONNECTED)
 				wait = true;
 		}