diff mbox

[v2] rbd: unregister watch before cancel_tasks

Message ID 1528107877-2264-1-git-send-email-dongsheng.yang@easystack.cn (mailing list archive)
State New, archived
Headers show

Commit Message

Dongsheng Yang June 4, 2018, 10:24 a.m. UTC
There is a problem if we are going to unmap a rbd device and the watch_dwork is going to queue delayed work for watch;

unmap Thread							watch Thread						   timer
do_rbd_remove
  |									|							|
  |-- test_and_set_bit(RBD_DEV_FLAG_REMOVING)				|							|
  |									|							|
  |-- cancel_tasks_sync(rbd_dev)					|							|
  |									|-- queue_delayed_work for watch			|
  |									|							|
  |-- destroy_workqueue(rbd_dev->task_wq);				|							|
           	|							|							|
           	|-- drain_workqueue(wq);				|							|
		|-- destroy other resources in wq			|							|
		|							|							|
		|							|							|-- call_timer_fn
		|														|
																	|-- __queue_work()
Then the delayed work escape the cancel_tasks_sync() and destroy_workqueue() and we will get an user-after-free call trace:

[  549.932085] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
[  549.934134] PGD 0 P4D 0
[  549.935145] Oops: 0000 [#1] SMP PTI
[  549.936283] Modules linked in: rbd(OE) libceph(OE) tcp_diag udp_diag inet_diag unix_diag af_packet_diag netlink_diag dns_resolver ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter sg cfg80211 rfkill snd_hda_codec_generic ext4 snd_hda_intel snd_hda_codec crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_core pcbc snd_hwdep snd_seq mbcache aesni_intel snd_seq_device jbd2 crypto_simd nfsd cryptd glue_helper snd_pcm snd_timer auth_rpcgss pcspkr snd virtio_balloon nfs_acl soundcore i2c_piix4 lockd grace sunrpc ip_tables xfs libcrc32c virtio_console virtio_blk ata_generic pata_acpi 8139too qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm ata_piix libata crc32c_intel virtio_pci 8139cp virtio_ring i2c_core mii virtio floppy serio_raw dm_mirror dm_region_hash
[  549.951835]  dm_log dm_mod dax [last unloaded: libceph]
[  549.953490] CPU: 7 PID: 0 Comm: swapper/7 Tainted: G           OE     4.17.0-rc6+ #13
[  549.955502] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[  549.957246] RIP: 0010:__queue_work+0x6a/0x3b0
[  549.958744] RSP: 0018:ffff9427df1c3e90 EFLAGS: 00010086
[  549.960374] RAX: ffff9427deca8400 RBX: 0000000000000000 RCX: 0000000000000000
[  549.962297] RDX: ffff9427deca8400 RSI: ffff9427df1c3e50 RDI: 0000000000000000
[  549.964216] RBP: ffff942783e39e00 R08: ffff9427deca8400 R09: ffff9427df1c3f00
[  549.966136] R10: 0000000000000004 R11: 0000000000000005 R12: ffff9427cfb85970
[  549.968070] R13: 0000000000002000 R14: 000000000001eca0 R15: 0000000000000007
[  549.969999] FS:  0000000000000000(0000) GS:ffff9427df1c0000(0000) knlGS:0000000000000000
[  549.972069] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  549.973775] CR2: 0000000000000000 CR3: 00000004c900a005 CR4: 00000000000206e0
[  549.975695] Call Trace:
[  549.976900]  <IRQ>
[  549.978033]  ? __queue_work+0x3b0/0x3b0
[  549.979442]  call_timer_fn+0x2d/0x130
[  549.980824]  run_timer_softirq+0x16e/0x430
[  549.982263]  ? tick_sched_timer+0x37/0x70
[  549.983691]  __do_softirq+0xd2/0x280
[  549.985035]  irq_exit+0xd5/0xe0
[  549.986316]  smp_apic_timer_interrupt+0x6c/0x130
[  549.987835]  apic_timer_interrupt+0xf/0x20

This patch move the cancel_tasks_sync() after unregister_watch(), then the delayed_work will be cancelled by cancel_tasks_sync().

Signed-off-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
---
 drivers/block/rbd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Comments

Ilya Dryomov June 5, 2018, 7:53 a.m. UTC | #1
On Mon, Jun 4, 2018 at 12:24 PM, Dongsheng Yang
<dongsheng.yang@easystack.cn> wrote:
> There is a problem if we are going to unmap a rbd device and the watch_dwork is going to queue delayed work for watch;
>
> unmap Thread                                                    watch Thread                                               timer
> do_rbd_remove
>   |                                                                     |                                                       |
>   |-- test_and_set_bit(RBD_DEV_FLAG_REMOVING)                           |                                                       |
>   |                                                                     |                                                       |
>   |-- cancel_tasks_sync(rbd_dev)                                        |                                                       |
>   |                                                                     |-- queue_delayed_work for watch                        |
>   |                                                                     |                                                       |
>   |-- destroy_workqueue(rbd_dev->task_wq);                              |                                                       |
>                 |                                                       |                                                       |
>                 |-- drain_workqueue(wq);                                |                                                       |
>                 |-- destroy other resources in wq                       |                                                       |
>                 |                                                       |                                                       |
>                 |                                                       |                                                       |-- call_timer_fn
>                 |                                                                                                               |
>                                                                                                                                         |-- __queue_work()
> Then the delayed work escape the cancel_tasks_sync() and destroy_workqueue() and we will get an user-after-free call trace:
>
> [  549.932085] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
> [  549.934134] PGD 0 P4D 0
> [  549.935145] Oops: 0000 [#1] SMP PTI
> [  549.936283] Modules linked in: rbd(OE) libceph(OE) tcp_diag udp_diag inet_diag unix_diag af_packet_diag netlink_diag dns_resolver ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter sg cfg80211 rfkill snd_hda_codec_generic ext4 snd_hda_intel snd_hda_codec crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_core pcbc snd_hwdep snd_seq mbcache aesni_intel snd_seq_device jbd2 crypto_simd nfsd cryptd glue_helper snd_pcm snd_timer auth_rpcgss pcspkr snd virtio_balloon nfs_acl soundcore i2c_piix4 lockd grace sunrpc ip_tables xfs libcrc32c virtio_console virtio_blk ata_generic pata_acpi 8139too qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm ata_piix libata crc32c_intel virtio_pci 8139cp virtio_ring i2c_core mii virtio floppy serio_raw dm_mirror dm_region_hash
> [  549.951835]  dm_log dm_mod dax [last unloaded: libceph]
> [  549.953490] CPU: 7 PID: 0 Comm: swapper/7 Tainted: G           OE     4.17.0-rc6+ #13
> [  549.955502] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
> [  549.957246] RIP: 0010:__queue_work+0x6a/0x3b0
> [  549.958744] RSP: 0018:ffff9427df1c3e90 EFLAGS: 00010086
> [  549.960374] RAX: ffff9427deca8400 RBX: 0000000000000000 RCX: 0000000000000000
> [  549.962297] RDX: ffff9427deca8400 RSI: ffff9427df1c3e50 RDI: 0000000000000000
> [  549.964216] RBP: ffff942783e39e00 R08: ffff9427deca8400 R09: ffff9427df1c3f00
> [  549.966136] R10: 0000000000000004 R11: 0000000000000005 R12: ffff9427cfb85970
> [  549.968070] R13: 0000000000002000 R14: 000000000001eca0 R15: 0000000000000007
> [  549.969999] FS:  0000000000000000(0000) GS:ffff9427df1c0000(0000) knlGS:0000000000000000
> [  549.972069] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  549.973775] CR2: 0000000000000000 CR3: 00000004c900a005 CR4: 00000000000206e0
> [  549.975695] Call Trace:
> [  549.976900]  <IRQ>
> [  549.978033]  ? __queue_work+0x3b0/0x3b0
> [  549.979442]  call_timer_fn+0x2d/0x130
> [  549.980824]  run_timer_softirq+0x16e/0x430
> [  549.982263]  ? tick_sched_timer+0x37/0x70
> [  549.983691]  __do_softirq+0xd2/0x280
> [  549.985035]  irq_exit+0xd5/0xe0
> [  549.986316]  smp_apic_timer_interrupt+0x6c/0x130
> [  549.987835]  apic_timer_interrupt+0xf/0x20
>
> This patch move the cancel_tasks_sync() after unregister_watch(), then the delayed_work will be cancelled by cancel_tasks_sync().
>
> Signed-off-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
> ---
>  drivers/block/rbd.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 2b4e90d..1f33f3c 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -3410,7 +3410,6 @@ static void cancel_tasks_sync(struct rbd_device *rbd_dev)
>  static void rbd_unregister_watch(struct rbd_device *rbd_dev)
>  {
>         WARN_ON(waitqueue_active(&rbd_dev->lock_waitq));
> -       cancel_tasks_sync(rbd_dev);
>
>         mutex_lock(&rbd_dev->watch_mutex);
>         if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED)
> @@ -3418,6 +3417,7 @@ static void rbd_unregister_watch(struct rbd_device *rbd_dev)
>         rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
>         mutex_unlock(&rbd_dev->watch_mutex);
>
> +       cancel_tasks_sync(rbd_dev);
>         ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
>  }
>

Applied, with a change so that just ->watch_dwork is moved.  I think
it's more logical to keep cancelling exclusive lock related functions
before unregistering the watch.

Thanks,

                Ilya
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 2b4e90d..1f33f3c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3410,7 +3410,6 @@  static void cancel_tasks_sync(struct rbd_device *rbd_dev)
 static void rbd_unregister_watch(struct rbd_device *rbd_dev)
 {
 	WARN_ON(waitqueue_active(&rbd_dev->lock_waitq));
-	cancel_tasks_sync(rbd_dev);
 
 	mutex_lock(&rbd_dev->watch_mutex);
 	if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED)
@@ -3418,6 +3417,7 @@  static void rbd_unregister_watch(struct rbd_device *rbd_dev)
 	rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
 	mutex_unlock(&rbd_dev->watch_mutex);
 
+	cancel_tasks_sync(rbd_dev);
 	ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
 }