diff mbox series

[PATCHv2,net,1/3] bonding: move mutex lock to a work queue for XFRM GC tasks

Message ID 20250225094049.20142-2-liuhangbin@gmail.com (mailing list archive)
State New
Delegated to: Netdev Maintainers
Headers show
Series bond: fix xfrm offload issues | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success Errors and warnings before: 26 (+1) this patch: 26 (+1)
netdev/cc_maintainers success CCed 10 of 10 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 91 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 13 this patch: 13
netdev/source_inline success Was 0 now: 0

Commit Message

Hangbin Liu Feb. 25, 2025, 9:40 a.m. UTC
The fixed commit placed mutex_lock() inside spin_lock_bh(), which triggers
a warning like:

BUG: sleeping function called from invalid context at...

Fix this by moving the mutex_lock() operation to a work queue.

Fixes: 2aeeef906d5a ("bonding: change ipsec_lock from spin lock to mutex")
Reported-by: Jakub Kicinski <kuba@kernel.org>
Closes: https://lore.kernel.org/netdev/20241212062734.182a0164@kernel.org
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 drivers/net/bonding/bond_main.c | 41 +++++++++++++++++++++++++--------
 include/net/bonding.h           |  6 +++++
 2 files changed, 37 insertions(+), 10 deletions(-)

Comments

Nikolay Aleksandrov Feb. 25, 2025, 11:05 a.m. UTC | #1
On 2/25/25 11:40, Hangbin Liu wrote:
> The fixed commit placed mutex_lock() inside spin_lock_bh(), which triggers
> a warning like:
> 
> BUG: sleeping function called from invalid context at...
> 
> Fix this by moving the mutex_lock() operation to a work queue.
> 
> Fixes: 2aeeef906d5a ("bonding: change ipsec_lock from spin lock to mutex")
> Reported-by: Jakub Kicinski <kuba@kernel.org>
> Closes: https://lore.kernel.org/netdev/20241212062734.182a0164@kernel.org
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> ---
>  drivers/net/bonding/bond_main.c | 41 +++++++++++++++++++++++++--------
>  include/net/bonding.h           |  6 +++++
>  2 files changed, 37 insertions(+), 10 deletions(-)
> 

Hi,
I think there are a few issues with this solution, comments below.

> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index e45bba240cbc..cc7064aa4b35 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -551,6 +551,25 @@ static void bond_ipsec_add_sa_all(struct bonding *bond)
>  	mutex_unlock(&bond->ipsec_lock);
>  }
>  
> +static void bond_xfrm_state_gc_work(struct work_struct *work)
> +{
> +	struct bond_xfrm_work *xfrm_work = container_of(work, struct bond_xfrm_work, work);
> +	struct bonding *bond = xfrm_work->bond;
> +	struct xfrm_state *xs = xfrm_work->xs;
> +	struct bond_ipsec *ipsec;
> +
> +	mutex_lock(&bond->ipsec_lock);
> +	list_for_each_entry(ipsec, &bond->ipsec_list, list) {
> +		if (ipsec->xs == xs) {
> +			list_del(&ipsec->list);
> +			kfree(ipsec);
> +			xfrm_state_put(xs);
> +			break;
> +		}
> +	}
> +	mutex_unlock(&bond->ipsec_lock);
> +}
> +
>  /**
>   * bond_ipsec_del_sa - clear out this specific SA
>   * @xs: pointer to transformer state struct
> @@ -558,9 +577,9 @@ static void bond_ipsec_add_sa_all(struct bonding *bond)
>  static void bond_ipsec_del_sa(struct xfrm_state *xs)
>  {
>  	struct net_device *bond_dev = xs->xso.dev;
> +	struct bond_xfrm_work *xfrm_work;
>  	struct net_device *real_dev;
>  	netdevice_tracker tracker;
> -	struct bond_ipsec *ipsec;
>  	struct bonding *bond;
>  	struct slave *slave;
>  
> @@ -592,15 +611,17 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
>  	real_dev->xfrmdev_ops->xdo_dev_state_delete(xs);
>  out:
>  	netdev_put(real_dev, &tracker);
> -	mutex_lock(&bond->ipsec_lock);
> -	list_for_each_entry(ipsec, &bond->ipsec_list, list) {
> -		if (ipsec->xs == xs) {
> -			list_del(&ipsec->list);
> -			kfree(ipsec);
> -			break;
> -		}
> -	}
> -	mutex_unlock(&bond->ipsec_lock);
> +
> +	xfrm_work = kmalloc(sizeof(*xfrm_work), GFP_ATOMIC);
> +	if (!xfrm_work)
> +		return;
> +

What happens if this allocation fails? I think you'll leak memory and
potentially call the xdo_dev callbacks for this xs again because it's
still in the list. Also this xfrm_work memory doesn't get freed anywhere, so
you're leaking it as well.

Perhaps you can do this allocation in add_sa, it seems you can sleep
there and potentially return an error if it fails, so this can never
fail later. You'll have to be careful with the freeing dance though.
Alternatively, make the work a part of struct bond so it doesn't need
memory management, but then you need a mechanism to queue these items (e.g.
a separate list with a spinlock) and would have more complexity with freeing
in parallel.

> +	INIT_WORK(&xfrm_work->work, bond_xfrm_state_gc_work);
> +	xfrm_work->bond = bond;
> +	xfrm_work->xs = xs;
> +	xfrm_state_hold(xs);
> +
> +	queue_work(bond->wq, &xfrm_work->work);

Note that nothing waits for this work anywhere and .ndo_uninit runs before
bond's .priv_destructor which means ipsec_lock will be destroyed and will be
used afterwards when destroying bond->wq from the destructor if there were
any queued works.

[snip]

Cheers,
 Nik
Hangbin Liu Feb. 25, 2025, 1:13 p.m. UTC | #2
On Tue, Feb 25, 2025 at 01:05:24PM +0200, Nikolay Aleksandrov wrote:
> > @@ -592,15 +611,17 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
> >  	real_dev->xfrmdev_ops->xdo_dev_state_delete(xs);
> >  out:
> >  	netdev_put(real_dev, &tracker);
> > -	mutex_lock(&bond->ipsec_lock);
> > -	list_for_each_entry(ipsec, &bond->ipsec_list, list) {
> > -		if (ipsec->xs == xs) {
> > -			list_del(&ipsec->list);
> > -			kfree(ipsec);
> > -			break;
> > -		}
> > -	}
> > -	mutex_unlock(&bond->ipsec_lock);
> > +
> > +	xfrm_work = kmalloc(sizeof(*xfrm_work), GFP_ATOMIC);
> > +	if (!xfrm_work)
> > +		return;
> > +
> 
> What happens if this allocation fails? I think you'll leak memory and
> potentially call the xdo_dev callbacks for this xs again because it's
> still in the list. Also this xfrm_work memory doesn't get freed anywhere, so
> you're leaking it as well.

Yes, I thought this too simply and forgot free the memory.
> 
> Perhaps you can do this allocation in add_sa, it seems you can sleep
> there and potentially return an error if it fails, so this can never
> fail later. You'll have to be careful with the freeing dance though.

Hmm, if we allocation this in add_sa, how to we get the xfrm_work
in del_sa? Add the xfrm_work to another list will need to sleep again
to find it out in del_sa.

> Alternatively, make the work a part of struct bond so it doesn't need
> memory management, but then you need a mechanism to queue these items (e.g.
> a separate list with a spinlock) and would have more complexity with freeing
> in parallel.

I used a dealy work queue in bond for my draft patch. As you said,
it need another list to queue the xs. And during the gc works, we need
to use spinlock again to get the xs out...

> 
> > +	INIT_WORK(&xfrm_work->work, bond_xfrm_state_gc_work);
> > +	xfrm_work->bond = bond;
> > +	xfrm_work->xs = xs;
> > +	xfrm_state_hold(xs);
> > +
> > +	queue_work(bond->wq, &xfrm_work->work);
> 
> Note that nothing waits for this work anywhere and .ndo_uninit runs before
> bond's .priv_destructor which means ipsec_lock will be destroyed and will be
> used afterwards when destroying bond->wq from the destructor if there were
> any queued works.

Do you mean we need to register the work queue in bond_init and cancel
it in bond_work_cancel_all()?

Thanks
Hangbin
Nikolay Aleksandrov Feb. 25, 2025, 1:30 p.m. UTC | #3
On 2/25/25 15:13, Hangbin Liu wrote:
> On Tue, Feb 25, 2025 at 01:05:24PM +0200, Nikolay Aleksandrov wrote:
>>> @@ -592,15 +611,17 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
>>>  	real_dev->xfrmdev_ops->xdo_dev_state_delete(xs);
>>>  out:
>>>  	netdev_put(real_dev, &tracker);
>>> -	mutex_lock(&bond->ipsec_lock);
>>> -	list_for_each_entry(ipsec, &bond->ipsec_list, list) {
>>> -		if (ipsec->xs == xs) {
>>> -			list_del(&ipsec->list);
>>> -			kfree(ipsec);
>>> -			break;
>>> -		}
>>> -	}
>>> -	mutex_unlock(&bond->ipsec_lock);
>>> +
>>> +	xfrm_work = kmalloc(sizeof(*xfrm_work), GFP_ATOMIC);
>>> +	if (!xfrm_work)
>>> +		return;
>>> +
>>
>> What happens if this allocation fails? I think you'll leak memory and
>> potentially call the xdo_dev callbacks for this xs again because it's
>> still in the list. Also this xfrm_work memory doesn't get freed anywhere, so
>> you're leaking it as well.
> 
> Yes, I thought this too simply and forgot free the memory.
>>
>> Perhaps you can do this allocation in add_sa, it seems you can sleep
>> there and potentially return an error if it fails, so this can never
>> fail later. You'll have to be careful with the freeing dance though.
> 
> Hmm, if we allocation this in add_sa, how to we get the xfrm_work
> in del_sa? Add the xfrm_work to another list will need to sleep again
> to find it out in del_sa.
> 

Well, you have struct bond_ipsec and it is tied with the work's lifetime
so you can stick it there. :)
I haven't looked closely how feasible it is.

>> Alternatively, make the work a part of struct bond so it doesn't need
>> memory management, but then you need a mechanism to queue these items (e.g.
>> a separate list with a spinlock) and would have more complexity with freeing
>> in parallel.
> 
> I used a dealy work queue in bond for my draft patch. As you said,
> it need another list to queue the xs. And during the gc works, we need
> to use spinlock again to get the xs out...
> 

Correct, it's a different kind of mess. :)

>>
>>> +	INIT_WORK(&xfrm_work->work, bond_xfrm_state_gc_work);
>>> +	xfrm_work->bond = bond;
>>> +	xfrm_work->xs = xs;
>>> +	xfrm_state_hold(xs);
>>> +
>>> +	queue_work(bond->wq, &xfrm_work->work);
>>
>> Note that nothing waits for this work anywhere and .ndo_uninit runs before
>> bond's .priv_destructor which means ipsec_lock will be destroyed and will be
>> used afterwards when destroying bond->wq from the destructor if there were
>> any queued works.
> 
> Do you mean we need to register the work queue in bond_init and cancel
> it in bond_work_cancel_all()?
> 
> Thanks
> Hangbin

That is one way, the other is if you have access to the work queue items then
you can cancel them which should be easier (i.e. cancel_delayed_work_sync).

Regardless of which way you choose to solve this (gc or work in bond_ipsec), there will
be some dance to be done for the sequence of events that will not be straight-forward.

Cheers,
 Nik
Cosmin Ratiu Feb. 25, 2025, 2 p.m. UTC | #4
On Tue, 2025-02-25 at 09:40 +0000, Hangbin Liu wrote:
> The fixed commit placed mutex_lock() inside spin_lock_bh(), which
> triggers
> a warning like:
> 
> BUG: sleeping function called from invalid context at...
> 
> Fix this by moving the mutex_lock() operation to a work queue.
> 
> Fixes: 2aeeef906d5a ("bonding: change ipsec_lock from spin lock to
> mutex")
> Reported-by: Jakub Kicinski <kuba@kernel.org>
> Closes:
> https://lore.kernel.org/netdev/20241212062734.182a0164@kernel.org
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> ---
>  drivers/net/bonding/bond_main.c | 41 +++++++++++++++++++++++++------
> --
>  include/net/bonding.h           |  6 +++++
>  2 files changed, 37 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/bonding/bond_main.c
> b/drivers/net/bonding/bond_main.c
> index e45bba240cbc..cc7064aa4b35 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -551,6 +551,25 @@ static void bond_ipsec_add_sa_all(struct bonding
> *bond)
>  	mutex_unlock(&bond->ipsec_lock);
>  }
>  
> +static void bond_xfrm_state_gc_work(struct work_struct *work)
> +{
> +	struct bond_xfrm_work *xfrm_work = container_of(work, struct
> bond_xfrm_work, work);
> +	struct bonding *bond = xfrm_work->bond;
> +	struct xfrm_state *xs = xfrm_work->xs;
> +	struct bond_ipsec *ipsec;
> +
> +	mutex_lock(&bond->ipsec_lock);
> +	list_for_each_entry(ipsec, &bond->ipsec_list, list) {
> +		if (ipsec->xs == xs) {
> +			list_del(&ipsec->list);
> +			kfree(ipsec);
> +			xfrm_state_put(xs);

I would expect xfrm_state_put to be called from outside the loop,
regardless of whether an entry is found in the list or not, because it
was unconditionally referenced when the work was created.

> +			break;
> +		}
> +	}
> +	mutex_unlock(&bond->ipsec_lock);
> +}
> +
>  /**
>   * bond_ipsec_del_sa - clear out this specific SA
>   * @xs: pointer to transformer state struct
> @@ -558,9 +577,9 @@ static void bond_ipsec_add_sa_all(struct bonding
> *bond)
>  static void bond_ipsec_del_sa(struct xfrm_state *xs)
>  {
>  	struct net_device *bond_dev = xs->xso.dev;
> +	struct bond_xfrm_work *xfrm_work;
>  	struct net_device *real_dev;
>  	netdevice_tracker tracker;
> -	struct bond_ipsec *ipsec;
>  	struct bonding *bond;
>  	struct slave *slave;
>  
> @@ -592,15 +611,17 @@ static void bond_ipsec_del_sa(struct xfrm_state
> *xs)
>  	real_dev->xfrmdev_ops->xdo_dev_state_delete(xs);
>  out:
>  	netdev_put(real_dev, &tracker);
> -	mutex_lock(&bond->ipsec_lock);
> -	list_for_each_entry(ipsec, &bond->ipsec_list, list) {
> -		if (ipsec->xs == xs) {
> -			list_del(&ipsec->list);
> -			kfree(ipsec);
> -			break;
> -		}
> -	}
> -	mutex_unlock(&bond->ipsec_lock);
> +
> +	xfrm_work = kmalloc(sizeof(*xfrm_work), GFP_ATOMIC);
> +	if (!xfrm_work)
> +		return;
> +
> +	INIT_WORK(&xfrm_work->work, bond_xfrm_state_gc_work);
> +	xfrm_work->bond = bond;
> +	xfrm_work->xs = xs;
> +	xfrm_state_hold(xs);
> +
> +	queue_work(bond->wq, &xfrm_work->work);
>  }
>  
>  static void bond_ipsec_del_sa_all(struct bonding *bond)
> diff --git a/include/net/bonding.h b/include/net/bonding.h
> index 8bb5f016969f..d54ba5e3affb 100644
> --- a/include/net/bonding.h
> +++ b/include/net/bonding.h
> @@ -209,6 +209,12 @@ struct bond_ipsec {
>  	struct xfrm_state *xs;
>  };
>  
> +struct bond_xfrm_work {
> +	struct work_struct work;
> +	struct bonding *bond;
> +	struct xfrm_state *xs;
> +};

Also, like Nikolai said, something needs to wait on all in-flight work
items.

This got me to stare at the code again. What if we move the removal of
the xs from bond->ipsec from bond_ipsec_del_sa to bond_ipsec_free_sa?
bond_ipsec_free_sa, unlike bond_ipsec_del_sa, is not called with x-
>lock held. It is called from the xfrm gc task or directly via
xfrm_state_put_sync and therefore wouldn't suffer from the locking
issue.

The tricky part is to make sure that inactive bond->ipsec entries
(after bond_ipsec_del_sa calls) do not cause issues if there's a
migration (bond_ipsec_del_sa_all is called) happening before
bond_ipsec_free_sa. Perhaps filtering by x->km.state != XFRM_STATE_DEAD
in bond_ipsec_del_sa_all.

What do you think about this idea?

Cosmin.
Nikolay Aleksandrov Feb. 25, 2025, 2:27 p.m. UTC | #5
On 2/25/25 16:00, Cosmin Ratiu wrote:
> On Tue, 2025-02-25 at 09:40 +0000, Hangbin Liu wrote:
>> The fixed commit placed mutex_lock() inside spin_lock_bh(), which
>> triggers
>> a warning like:
>>
>> BUG: sleeping function called from invalid context at...
>>
>> Fix this by moving the mutex_lock() operation to a work queue.
>>
>> Fixes: 2aeeef906d5a ("bonding: change ipsec_lock from spin lock to
>> mutex")
>> Reported-by: Jakub Kicinski <kuba@kernel.org>
>> Closes:
>> https://lore.kernel.org/netdev/20241212062734.182a0164@kernel.org
>> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
>> ---
>>  drivers/net/bonding/bond_main.c | 41 +++++++++++++++++++++++++------
>> --
>>  include/net/bonding.h           |  6 +++++
>>  2 files changed, 37 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/net/bonding/bond_main.c
>> b/drivers/net/bonding/bond_main.c
>> index e45bba240cbc..cc7064aa4b35 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -551,6 +551,25 @@ static void bond_ipsec_add_sa_all(struct bonding
>> *bond)
>>  	mutex_unlock(&bond->ipsec_lock);
>>  }
>>  
>> +static void bond_xfrm_state_gc_work(struct work_struct *work)
>> +{
>> +	struct bond_xfrm_work *xfrm_work = container_of(work, struct
>> bond_xfrm_work, work);
>> +	struct bonding *bond = xfrm_work->bond;
>> +	struct xfrm_state *xs = xfrm_work->xs;
>> +	struct bond_ipsec *ipsec;
>> +
>> +	mutex_lock(&bond->ipsec_lock);
>> +	list_for_each_entry(ipsec, &bond->ipsec_list, list) {
>> +		if (ipsec->xs == xs) {
>> +			list_del(&ipsec->list);
>> +			kfree(ipsec);
>> +			xfrm_state_put(xs);
> 
> I would expect xfrm_state_put to be called from outside the loop,
> regardless of whether an entry is found in the list or not, because it
> was unconditionally referenced when the work was created.
> 
>> +			break;
>> +		}
>> +	}
>> +	mutex_unlock(&bond->ipsec_lock);
>> +}
>> +
>>  /**
>>   * bond_ipsec_del_sa - clear out this specific SA
>>   * @xs: pointer to transformer state struct
>> @@ -558,9 +577,9 @@ static void bond_ipsec_add_sa_all(struct bonding
>> *bond)
>>  static void bond_ipsec_del_sa(struct xfrm_state *xs)
>>  {
>>  	struct net_device *bond_dev = xs->xso.dev;
>> +	struct bond_xfrm_work *xfrm_work;
>>  	struct net_device *real_dev;
>>  	netdevice_tracker tracker;
>> -	struct bond_ipsec *ipsec;
>>  	struct bonding *bond;
>>  	struct slave *slave;
>>  
>> @@ -592,15 +611,17 @@ static void bond_ipsec_del_sa(struct xfrm_state
>> *xs)
>>  	real_dev->xfrmdev_ops->xdo_dev_state_delete(xs);
>>  out:
>>  	netdev_put(real_dev, &tracker);
>> -	mutex_lock(&bond->ipsec_lock);
>> -	list_for_each_entry(ipsec, &bond->ipsec_list, list) {
>> -		if (ipsec->xs == xs) {
>> -			list_del(&ipsec->list);
>> -			kfree(ipsec);
>> -			break;
>> -		}
>> -	}
>> -	mutex_unlock(&bond->ipsec_lock);
>> +
>> +	xfrm_work = kmalloc(sizeof(*xfrm_work), GFP_ATOMIC);
>> +	if (!xfrm_work)
>> +		return;
>> +
>> +	INIT_WORK(&xfrm_work->work, bond_xfrm_state_gc_work);
>> +	xfrm_work->bond = bond;
>> +	xfrm_work->xs = xs;
>> +	xfrm_state_hold(xs);
>> +
>> +	queue_work(bond->wq, &xfrm_work->work);
>>  }
>>  
>>  static void bond_ipsec_del_sa_all(struct bonding *bond)
>> diff --git a/include/net/bonding.h b/include/net/bonding.h
>> index 8bb5f016969f..d54ba5e3affb 100644
>> --- a/include/net/bonding.h
>> +++ b/include/net/bonding.h
>> @@ -209,6 +209,12 @@ struct bond_ipsec {
>>  	struct xfrm_state *xs;
>>  };
>>  
>> +struct bond_xfrm_work {
>> +	struct work_struct work;
>> +	struct bonding *bond;
>> +	struct xfrm_state *xs;
>> +};
> 
> Also, like Nikolai said, something needs to wait on all in-flight work
> items.
> 
> This got me to stare at the code again. What if we move the removal of
> the xs from bond->ipsec from bond_ipsec_del_sa to bond_ipsec_free_sa?
> bond_ipsec_free_sa, unlike bond_ipsec_del_sa, is not called with x-
>> lock held. It is called from the xfrm gc task or directly via
> xfrm_state_put_sync and therefore wouldn't suffer from the locking
> issue.
> 
> The tricky part is to make sure that inactive bond->ipsec entries
> (after bond_ipsec_del_sa calls) do not cause issues if there's a
> migration (bond_ipsec_del_sa_all is called) happening before
> bond_ipsec_free_sa. Perhaps filtering by x->km.state != XFRM_STATE_DEAD
> in bond_ipsec_del_sa_all.
> 
> What do you think about this idea?
> 
> Cosmin.

I know the question was for Hangbin, but I do like this solution. I missed
the xdo_dev_state_free callback, it could lead to a much simpler solution
with some care.

Cheers,
 Nik
diff mbox series

Patch

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e45bba240cbc..cc7064aa4b35 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -551,6 +551,25 @@  static void bond_ipsec_add_sa_all(struct bonding *bond)
 	mutex_unlock(&bond->ipsec_lock);
 }
 
+static void bond_xfrm_state_gc_work(struct work_struct *work)
+{
+	struct bond_xfrm_work *xfrm_work = container_of(work, struct bond_xfrm_work, work);
+	struct bonding *bond = xfrm_work->bond;
+	struct xfrm_state *xs = xfrm_work->xs;
+	struct bond_ipsec *ipsec;
+
+	mutex_lock(&bond->ipsec_lock);
+	list_for_each_entry(ipsec, &bond->ipsec_list, list) {
+		if (ipsec->xs == xs) {
+			list_del(&ipsec->list);
+			kfree(ipsec);
+			xfrm_state_put(xs);
+			break;
+		}
+	}
+	mutex_unlock(&bond->ipsec_lock);
+}
+
 /**
  * bond_ipsec_del_sa - clear out this specific SA
  * @xs: pointer to transformer state struct
@@ -558,9 +577,9 @@  static void bond_ipsec_add_sa_all(struct bonding *bond)
 static void bond_ipsec_del_sa(struct xfrm_state *xs)
 {
 	struct net_device *bond_dev = xs->xso.dev;
+	struct bond_xfrm_work *xfrm_work;
 	struct net_device *real_dev;
 	netdevice_tracker tracker;
-	struct bond_ipsec *ipsec;
 	struct bonding *bond;
 	struct slave *slave;
 
@@ -592,15 +611,17 @@  static void bond_ipsec_del_sa(struct xfrm_state *xs)
 	real_dev->xfrmdev_ops->xdo_dev_state_delete(xs);
 out:
 	netdev_put(real_dev, &tracker);
-	mutex_lock(&bond->ipsec_lock);
-	list_for_each_entry(ipsec, &bond->ipsec_list, list) {
-		if (ipsec->xs == xs) {
-			list_del(&ipsec->list);
-			kfree(ipsec);
-			break;
-		}
-	}
-	mutex_unlock(&bond->ipsec_lock);
+
+	xfrm_work = kmalloc(sizeof(*xfrm_work), GFP_ATOMIC);
+	if (!xfrm_work)
+		return;
+
+	INIT_WORK(&xfrm_work->work, bond_xfrm_state_gc_work);
+	xfrm_work->bond = bond;
+	xfrm_work->xs = xs;
+	xfrm_state_hold(xs);
+
+	queue_work(bond->wq, &xfrm_work->work);
 }
 
 static void bond_ipsec_del_sa_all(struct bonding *bond)
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 8bb5f016969f..d54ba5e3affb 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -209,6 +209,12 @@  struct bond_ipsec {
 	struct xfrm_state *xs;
 };
 
+struct bond_xfrm_work {
+	struct work_struct work;
+	struct bonding *bond;
+	struct xfrm_state *xs;
+};
+
 /*
  * Here are the locking policies for the two bonding locks:
  * Get rcu_read_lock when reading or RTNL when writing slave list.