diff mbox series

[1/5] xen/livepatch: register livepatch regions when loaded

Message ID 20231130142944.46322-2-roger.pau@citrix.com (mailing list archive)
State New, archived
Headers show
Series xen/livepatch: fixes for the pre-apply / post-revert hooks | expand

Commit Message

Roger Pau Monne Nov. 30, 2023, 2:29 p.m. UTC
Currently livepatch regions are registered as virtual regions only after the
livepatch has been applied.

This can lead to issues when using the pre-apply or post-revert hooks, as at
the point the livepatch is not in the virtual regions list.  If a livepatch
pre-apply hook contains a WARN() it would trigger an hypervisor crash, as the
code to handle the bug frame won't be able to find the instruction pointer that
triggered the #UD in any of the registered virtual regions, and hence crash.

Fix this by adding the livepatch payloads as virtual regions as soon as loaded,
and only remove them once the payload is unloaded.  This requires some changes
to the virtual regions code, as the removal of the virtual regions is no longer
done in stop machine context, and hence an RCU barrier is added in order to
make sure there are no users of the virtual region after it's been removed from
the list.

Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks')
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/common/livepatch.c      |  5 +++--
 xen/common/virtual_region.c | 40 +++++++++++--------------------------
 2 files changed, 15 insertions(+), 30 deletions(-)

Comments

Jan Beulich Dec. 5, 2023, 1:47 p.m. UTC | #1
On 30.11.2023 15:29, Roger Pau Monne wrote:
> Currently livepatch regions are registered as virtual regions only after the
> livepatch has been applied.
> 
> This can lead to issues when using the pre-apply or post-revert hooks, as at
> the point the livepatch is not in the virtual regions list.  If a livepatch
> pre-apply hook contains a WARN() it would trigger an hypervisor crash, as the
> code to handle the bug frame won't be able to find the instruction pointer that
> triggered the #UD in any of the registered virtual regions, and hence crash.
> 
> Fix this by adding the livepatch payloads as virtual regions as soon as loaded,
> and only remove them once the payload is unloaded.  This requires some changes
> to the virtual regions code, as the removal of the virtual regions is no longer
> done in stop machine context, and hence an RCU barrier is added in order to
> make sure there are no users of the virtual region after it's been removed from
> the list.
> 
> Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks')
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
>  xen/common/livepatch.c      |  5 +++--
>  xen/common/virtual_region.c | 40 +++++++++++--------------------------
>  2 files changed, 15 insertions(+), 30 deletions(-)
> 
> diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
> index 1209fea2566c..3199432f11f5 100644
> --- a/xen/common/livepatch.c
> +++ b/xen/common/livepatch.c
> @@ -942,6 +942,8 @@ static int prepare_payload(struct payload *payload,
>          }
>      }
>  
> +    register_virtual_region(region);
> +
>      return 0;
>  }
>  
> @@ -1071,6 +1073,7 @@ static int build_symbol_table(struct payload *payload,
>  static void free_payload(struct payload *data)
>  {
>      ASSERT(spin_is_locked(&payload_lock));
> +    unregister_virtual_region(&data->region);
>      list_del(&data->list);
>      payload_cnt--;
>      payload_version++;
> @@ -1386,7 +1389,6 @@ static inline void apply_payload_tail(struct payload *data)
>       * The applied_list is iterated by the trap code.
>       */
>      list_add_tail_rcu(&data->applied_list, &applied_list);
> -    register_virtual_region(&data->region);
>  
>      data->state = LIVEPATCH_STATE_APPLIED;
>  }
> @@ -1432,7 +1434,6 @@ static inline void revert_payload_tail(struct payload *data)
>       * The applied_list is iterated by the trap code.
>       */
>      list_del_rcu(&data->applied_list);
> -    unregister_virtual_region(&data->region);
>  
>      data->reverted = true;
>      data->state = LIVEPATCH_STATE_CHECKED;
> diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c
> index 5f89703f513b..b444253848cf 100644
> --- a/xen/common/virtual_region.c
> +++ b/xen/common/virtual_region.c
> @@ -23,14 +23,8 @@ static struct virtual_region core_init __initdata = {
>  };
>  
>  /*
> - * RCU locking. Additions are done either at startup (when there is only
> - * one CPU) or when all CPUs are running without IRQs.
> - *
> - * Deletions are bit tricky. We do it when Live Patch (all CPUs running
> - * without IRQs) or during bootup (when clearing the init).
> - *
> - * Hence we use list_del_rcu (which sports an memory fence) and a spinlock
> - * on deletion.
> + * RCU locking. Modifications to the list must be done in exclusive mode, and
> + * hence need to hold the spinlock.
>   *
>   * All readers of virtual_region_list MUST use list_for_each_entry_rcu.
>   */
> @@ -58,38 +52,28 @@ const struct virtual_region *find_text_region(unsigned long addr)
>  
>  void register_virtual_region(struct virtual_region *r)
>  {
> -    ASSERT(!local_irq_is_enabled());
> +    unsigned long flags;
>  
> +    spin_lock_irqsave(&virtual_region_lock, flags);
>      list_add_tail_rcu(&r->list, &virtual_region_list);
> +    spin_unlock_irqrestore(&virtual_region_lock, flags);
>  }
>  
>  static void remove_virtual_region(struct virtual_region *r)
>  {
> -    unsigned long flags;
> +     unsigned long flags;

Nit: Stray blank added?

> -    spin_lock_irqsave(&virtual_region_lock, flags);
> -    list_del_rcu(&r->list);
> -    spin_unlock_irqrestore(&virtual_region_lock, flags);
> -    /*
> -     * We do not need to invoke call_rcu.
> -     *
> -     * This is due to the fact that on the deletion we have made sure
> -     * to use spinlocks (to guard against somebody else calling
> -     * unregister_virtual_region) and list_deletion spiced with
> -     * memory barrier.
> -     *
> -     * That protects us from corrupting the list as the readers all
> -     * use list_for_each_entry_rcu which is safe against concurrent
> -     * deletions.
> -     */
> +     spin_lock_irqsave(&virtual_region_lock, flags);
> +     list_del_rcu(&r->list);
> +     spin_unlock_irqrestore(&virtual_region_lock, flags);
>  }
>  
>  void unregister_virtual_region(struct virtual_region *r)
>  {
> -    /* Expected to be called from Live Patch - which has IRQs disabled. */
> -    ASSERT(!local_irq_is_enabled());
> -
>      remove_virtual_region(r);
> +
> +    /* Assert that no CPU might be using the removed region. */
> +    rcu_barrier();
>  }

rcu_barrier() is a relatively heavy operation aiui. Seeing ...

>  #if defined(CONFIG_LIVEPATCH) && defined(CONFIG_X86)

... this I'd like to ask to consider hiding {,un}register_virtual_region()
in "#ifdef CONFIG_LIVEPATCH" as well, to make clear these aren't supposed
to be used for other purpose. Would at the same time address two Misra
violations, I think (both functions having no callers when !LIVEPATCH).

Jan
Roger Pau Monne Dec. 5, 2023, 3:04 p.m. UTC | #2
On Tue, Dec 05, 2023 at 02:47:56PM +0100, Jan Beulich wrote:
> On 30.11.2023 15:29, Roger Pau Monne wrote:
> > diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c
> > index 5f89703f513b..b444253848cf 100644
> > --- a/xen/common/virtual_region.c
> > +++ b/xen/common/virtual_region.c
> > @@ -23,14 +23,8 @@ static struct virtual_region core_init __initdata = {
> >  };
> >  
> >  /*
> > - * RCU locking. Additions are done either at startup (when there is only
> > - * one CPU) or when all CPUs are running without IRQs.
> > - *
> > - * Deletions are bit tricky. We do it when Live Patch (all CPUs running
> > - * without IRQs) or during bootup (when clearing the init).
> > - *
> > - * Hence we use list_del_rcu (which sports an memory fence) and a spinlock
> > - * on deletion.
> > + * RCU locking. Modifications to the list must be done in exclusive mode, and
> > + * hence need to hold the spinlock.
> >   *
> >   * All readers of virtual_region_list MUST use list_for_each_entry_rcu.
> >   */
> > @@ -58,38 +52,28 @@ const struct virtual_region *find_text_region(unsigned long addr)
> >  
> >  void register_virtual_region(struct virtual_region *r)
> >  {
> > -    ASSERT(!local_irq_is_enabled());
> > +    unsigned long flags;
> >  
> > +    spin_lock_irqsave(&virtual_region_lock, flags);
> >      list_add_tail_rcu(&r->list, &virtual_region_list);
> > +    spin_unlock_irqrestore(&virtual_region_lock, flags);
> >  }
> >  
> >  static void remove_virtual_region(struct virtual_region *r)
> >  {
> > -    unsigned long flags;
> > +     unsigned long flags;
> 
> Nit: Stray blank added?

Oh, my bad.

> > -    spin_lock_irqsave(&virtual_region_lock, flags);
> > -    list_del_rcu(&r->list);
> > -    spin_unlock_irqrestore(&virtual_region_lock, flags);
> > -    /*
> > -     * We do not need to invoke call_rcu.
> > -     *
> > -     * This is due to the fact that on the deletion we have made sure
> > -     * to use spinlocks (to guard against somebody else calling
> > -     * unregister_virtual_region) and list_deletion spiced with
> > -     * memory barrier.
> > -     *
> > -     * That protects us from corrupting the list as the readers all
> > -     * use list_for_each_entry_rcu which is safe against concurrent
> > -     * deletions.
> > -     */
> > +     spin_lock_irqsave(&virtual_region_lock, flags);
> > +     list_del_rcu(&r->list);
> > +     spin_unlock_irqrestore(&virtual_region_lock, flags);
> >  }
> >  
> >  void unregister_virtual_region(struct virtual_region *r)
> >  {
> > -    /* Expected to be called from Live Patch - which has IRQs disabled. */
> > -    ASSERT(!local_irq_is_enabled());
> > -
> >      remove_virtual_region(r);
> > +
> > +    /* Assert that no CPU might be using the removed region. */
> > +    rcu_barrier();
> >  }
> 
> rcu_barrier() is a relatively heavy operation aiui. Seeing ...
> 
> >  #if defined(CONFIG_LIVEPATCH) && defined(CONFIG_X86)
> 
> ... this I'd like to ask to consider hiding {,un}register_virtual_region()
> in "#ifdef CONFIG_LIVEPATCH" as well, to make clear these aren't supposed
> to be used for other purpose. Would at the same time address two Misra
> violations, I think (both functions having no callers when !LIVEPATCH).

That's fine, I can do it this same patch unless you prefer such
adjustment to be in a separate change.

Thanks, Roger.
Jan Beulich Dec. 5, 2023, 3:16 p.m. UTC | #3
On 05.12.2023 16:04, Roger Pau Monné wrote:
> On Tue, Dec 05, 2023 at 02:47:56PM +0100, Jan Beulich wrote:
>> On 30.11.2023 15:29, Roger Pau Monne wrote:
>>> diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c
>>> index 5f89703f513b..b444253848cf 100644
>>> --- a/xen/common/virtual_region.c
>>> +++ b/xen/common/virtual_region.c
>>> @@ -23,14 +23,8 @@ static struct virtual_region core_init __initdata = {
>>>  };
>>>  
>>>  /*
>>> - * RCU locking. Additions are done either at startup (when there is only
>>> - * one CPU) or when all CPUs are running without IRQs.
>>> - *
>>> - * Deletions are bit tricky. We do it when Live Patch (all CPUs running
>>> - * without IRQs) or during bootup (when clearing the init).
>>> - *
>>> - * Hence we use list_del_rcu (which sports an memory fence) and a spinlock
>>> - * on deletion.
>>> + * RCU locking. Modifications to the list must be done in exclusive mode, and
>>> + * hence need to hold the spinlock.
>>>   *
>>>   * All readers of virtual_region_list MUST use list_for_each_entry_rcu.
>>>   */
>>> @@ -58,38 +52,28 @@ const struct virtual_region *find_text_region(unsigned long addr)
>>>  
>>>  void register_virtual_region(struct virtual_region *r)
>>>  {
>>> -    ASSERT(!local_irq_is_enabled());
>>> +    unsigned long flags;
>>>  
>>> +    spin_lock_irqsave(&virtual_region_lock, flags);
>>>      list_add_tail_rcu(&r->list, &virtual_region_list);
>>> +    spin_unlock_irqrestore(&virtual_region_lock, flags);
>>>  }
>>>  
>>>  static void remove_virtual_region(struct virtual_region *r)
>>>  {
>>> -    unsigned long flags;
>>> +     unsigned long flags;
>>
>> Nit: Stray blank added?
> 
> Oh, my bad.
> 
>>> -    spin_lock_irqsave(&virtual_region_lock, flags);
>>> -    list_del_rcu(&r->list);
>>> -    spin_unlock_irqrestore(&virtual_region_lock, flags);
>>> -    /*
>>> -     * We do not need to invoke call_rcu.
>>> -     *
>>> -     * This is due to the fact that on the deletion we have made sure
>>> -     * to use spinlocks (to guard against somebody else calling
>>> -     * unregister_virtual_region) and list_deletion spiced with
>>> -     * memory barrier.
>>> -     *
>>> -     * That protects us from corrupting the list as the readers all
>>> -     * use list_for_each_entry_rcu which is safe against concurrent
>>> -     * deletions.
>>> -     */
>>> +     spin_lock_irqsave(&virtual_region_lock, flags);
>>> +     list_del_rcu(&r->list);
>>> +     spin_unlock_irqrestore(&virtual_region_lock, flags);
>>>  }
>>>  
>>>  void unregister_virtual_region(struct virtual_region *r)
>>>  {
>>> -    /* Expected to be called from Live Patch - which has IRQs disabled. */
>>> -    ASSERT(!local_irq_is_enabled());
>>> -
>>>      remove_virtual_region(r);
>>> +
>>> +    /* Assert that no CPU might be using the removed region. */
>>> +    rcu_barrier();
>>>  }
>>
>> rcu_barrier() is a relatively heavy operation aiui. Seeing ...
>>
>>>  #if defined(CONFIG_LIVEPATCH) && defined(CONFIG_X86)
>>
>> ... this I'd like to ask to consider hiding {,un}register_virtual_region()
>> in "#ifdef CONFIG_LIVEPATCH" as well, to make clear these aren't supposed
>> to be used for other purpose. Would at the same time address two Misra
>> violations, I think (both functions having no callers when !LIVEPATCH).
> 
> That's fine, I can do it this same patch unless you prefer such
> adjustment to be in a separate change.

Since the change itself constitutes at least part of the reason for the
adjustment, this would be fine with me. (A separate change, if preferred
by others, would still be fine, too.)

Jan
Ross Lagerwall Feb. 23, 2024, 9:45 a.m. UTC | #4
On Thu, Nov 30, 2023 at 2:30 PM Roger Pau Monne <roger.pau@citrix.com> wrote:
>
> Currently livepatch regions are registered as virtual regions only after the
> livepatch has been applied.
>
> This can lead to issues when using the pre-apply or post-revert hooks, as at
> the point the livepatch is not in the virtual regions list.  If a livepatch
> pre-apply hook contains a WARN() it would trigger an hypervisor crash, as the
> code to handle the bug frame won't be able to find the instruction pointer that
> triggered the #UD in any of the registered virtual regions, and hence crash.
>
> Fix this by adding the livepatch payloads as virtual regions as soon as loaded,
> and only remove them once the payload is unloaded.  This requires some changes
> to the virtual regions code, as the removal of the virtual regions is no longer
> done in stop machine context, and hence an RCU barrier is added in order to
> make sure there are no users of the virtual region after it's been removed from
> the list.
>
> Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks')
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
>  xen/common/livepatch.c      |  5 +++--
>  xen/common/virtual_region.c | 40 +++++++++++--------------------------
>  2 files changed, 15 insertions(+), 30 deletions(-)
>
> diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
> index 1209fea2566c..3199432f11f5 100644
> --- a/xen/common/livepatch.c
> +++ b/xen/common/livepatch.c
> @@ -942,6 +942,8 @@ static int prepare_payload(struct payload *payload,
>          }
>      }
>
> +    register_virtual_region(region);
> +
>      return 0;
>  }
>

The region is registered in prepare_payload() but if e.g. the build id
check in load_payload_data() fails, it won't unregister the region
since the failure path calls free_payload_data(), not free_payload().
Perhaps the call to register_virtual_region() could be done as the last
thing in load_payload_data()?

Ross
Roger Pau Monne Feb. 23, 2024, 2:55 p.m. UTC | #5
On Fri, Feb 23, 2024 at 09:45:15AM +0000, Ross Lagerwall wrote:
> On Thu, Nov 30, 2023 at 2:30 PM Roger Pau Monne <roger.pau@citrix.com> wrote:
> >
> > Currently livepatch regions are registered as virtual regions only after the
> > livepatch has been applied.
> >
> > This can lead to issues when using the pre-apply or post-revert hooks, as at
> > the point the livepatch is not in the virtual regions list.  If a livepatch
> > pre-apply hook contains a WARN() it would trigger an hypervisor crash, as the
> > code to handle the bug frame won't be able to find the instruction pointer that
> > triggered the #UD in any of the registered virtual regions, and hence crash.
> >
> > Fix this by adding the livepatch payloads as virtual regions as soon as loaded,
> > and only remove them once the payload is unloaded.  This requires some changes
> > to the virtual regions code, as the removal of the virtual regions is no longer
> > done in stop machine context, and hence an RCU barrier is added in order to
> > make sure there are no users of the virtual region after it's been removed from
> > the list.
> >
> > Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks')
> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> > ---
> >  xen/common/livepatch.c      |  5 +++--
> >  xen/common/virtual_region.c | 40 +++++++++++--------------------------
> >  2 files changed, 15 insertions(+), 30 deletions(-)
> >
> > diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
> > index 1209fea2566c..3199432f11f5 100644
> > --- a/xen/common/livepatch.c
> > +++ b/xen/common/livepatch.c
> > @@ -942,6 +942,8 @@ static int prepare_payload(struct payload *payload,
> >          }
> >      }
> >
> > +    register_virtual_region(region);
> > +
> >      return 0;
> >  }
> >
> 
> The region is registered in prepare_payload() but if e.g. the build id
> check in load_payload_data() fails, it won't unregister the region
> since the failure path calls free_payload_data(), not free_payload().
> Perhaps the call to register_virtual_region() could be done as the last
> thing in load_payload_data()?

I've moved it to livepatch_upload(), as I think it's more natural to
be done together with the addition to the list of livepatches.  Thanks
for spotting it, I guess I got confused between free_payload_data()
free_payload().

Roger.
diff mbox series

Patch

diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
index 1209fea2566c..3199432f11f5 100644
--- a/xen/common/livepatch.c
+++ b/xen/common/livepatch.c
@@ -942,6 +942,8 @@  static int prepare_payload(struct payload *payload,
         }
     }
 
+    register_virtual_region(region);
+
     return 0;
 }
 
@@ -1071,6 +1073,7 @@  static int build_symbol_table(struct payload *payload,
 static void free_payload(struct payload *data)
 {
     ASSERT(spin_is_locked(&payload_lock));
+    unregister_virtual_region(&data->region);
     list_del(&data->list);
     payload_cnt--;
     payload_version++;
@@ -1386,7 +1389,6 @@  static inline void apply_payload_tail(struct payload *data)
      * The applied_list is iterated by the trap code.
      */
     list_add_tail_rcu(&data->applied_list, &applied_list);
-    register_virtual_region(&data->region);
 
     data->state = LIVEPATCH_STATE_APPLIED;
 }
@@ -1432,7 +1434,6 @@  static inline void revert_payload_tail(struct payload *data)
      * The applied_list is iterated by the trap code.
      */
     list_del_rcu(&data->applied_list);
-    unregister_virtual_region(&data->region);
 
     data->reverted = true;
     data->state = LIVEPATCH_STATE_CHECKED;
diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c
index 5f89703f513b..b444253848cf 100644
--- a/xen/common/virtual_region.c
+++ b/xen/common/virtual_region.c
@@ -23,14 +23,8 @@  static struct virtual_region core_init __initdata = {
 };
 
 /*
- * RCU locking. Additions are done either at startup (when there is only
- * one CPU) or when all CPUs are running without IRQs.
- *
- * Deletions are bit tricky. We do it when Live Patch (all CPUs running
- * without IRQs) or during bootup (when clearing the init).
- *
- * Hence we use list_del_rcu (which sports an memory fence) and a spinlock
- * on deletion.
+ * RCU locking. Modifications to the list must be done in exclusive mode, and
+ * hence need to hold the spinlock.
  *
  * All readers of virtual_region_list MUST use list_for_each_entry_rcu.
  */
@@ -58,38 +52,28 @@  const struct virtual_region *find_text_region(unsigned long addr)
 
 void register_virtual_region(struct virtual_region *r)
 {
-    ASSERT(!local_irq_is_enabled());
+    unsigned long flags;
 
+    spin_lock_irqsave(&virtual_region_lock, flags);
     list_add_tail_rcu(&r->list, &virtual_region_list);
+    spin_unlock_irqrestore(&virtual_region_lock, flags);
 }
 
 static void remove_virtual_region(struct virtual_region *r)
 {
-    unsigned long flags;
+     unsigned long flags;
 
-    spin_lock_irqsave(&virtual_region_lock, flags);
-    list_del_rcu(&r->list);
-    spin_unlock_irqrestore(&virtual_region_lock, flags);
-    /*
-     * We do not need to invoke call_rcu.
-     *
-     * This is due to the fact that on the deletion we have made sure
-     * to use spinlocks (to guard against somebody else calling
-     * unregister_virtual_region) and list_deletion spiced with
-     * memory barrier.
-     *
-     * That protects us from corrupting the list as the readers all
-     * use list_for_each_entry_rcu which is safe against concurrent
-     * deletions.
-     */
+     spin_lock_irqsave(&virtual_region_lock, flags);
+     list_del_rcu(&r->list);
+     spin_unlock_irqrestore(&virtual_region_lock, flags);
 }
 
 void unregister_virtual_region(struct virtual_region *r)
 {
-    /* Expected to be called from Live Patch - which has IRQs disabled. */
-    ASSERT(!local_irq_is_enabled());
-
     remove_virtual_region(r);
+
+    /* Assert that no CPU might be using the removed region. */
+    rcu_barrier();
 }
 
 #if defined(CONFIG_LIVEPATCH) && defined(CONFIG_X86)