diff mbox

genhd: Do not hold event lock when scheduling workqueue elements

Message ID 1484732896-22941-1-git-send-email-hare@suse.de (mailing list archive)
State New, archived
Headers show

Commit Message

Hannes Reinecke Jan. 18, 2017, 9:48 a.m. UTC
When scheduling workqueue elements the callback function might be called
directly, so holding the event lock is potentially dangerous as it might
lead to a deadlock:

[  989.542827] INFO: task systemd-udevd:459 blocked for more than 480 seconds.
[  989.609721]       Not tainted 4.10.0-rc4+ #546
[  989.648545] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this 
message.
[  989.716429] systemd-udevd   D13368   459      1 0x00000004
[  989.716435] Call Trace:
[  989.716444]  __schedule+0x2f2/0xb10
[  989.716447]  schedule+0x3d/0x90
[  989.716449]  schedule_timeout+0x2fc/0x600
[  989.716451]  ? wait_for_completion+0xac/0x110
[  989.716456]  ? mark_held_locks+0x66/0x90
[  989.716458]  ? _raw_spin_unlock_irq+0x2c/0x40
[  989.716460]  ? trace_hardirqs_on_caller+0x111/0x1e0
[  989.716461]  wait_for_completion+0xb4/0x110
[  989.716464]  ? wake_up_q+0x80/0x80
[  989.716469]  flush_work+0x1ea/0x2a0
[  989.716470]  ? flush_work+0x24e/0x2a0
[  989.716472]  ? destroy_worker+0xd0/0xd0
[  989.716474]  __cancel_work_timer+0x11a/0x1e0
[  989.716476]  ? trace_hardirqs_on_caller+0x111/0x1e0
[  989.716477]  cancel_delayed_work_sync+0x13/0x20
[  989.716482]  disk_block_events+0x82/0x90
[  989.716487]  __blkdev_get+0x58/0x450
[  989.716488]  blkdev_get+0x1ce/0x340
[  989.716490]  ? _raw_spin_unlock+0x27/0x40
[  989.716492]  blkdev_open+0x5b/0x70
[  989.716501]  do_dentry_open+0x213/0x310
[  989.716505]  ? blkdev_get_by_dev+0x50/0x50
[  989.716507]  vfs_open+0x4f/0x80
[  989.716518]  ? may_open+0x9b/0x100
[  989.716521]  path_openat+0x48a/0xdc0
[  989.716527]  ? _crng_backtrack_protect+0x30/0x80
[  989.716530]  do_filp_open+0x7e/0xd0
[  989.716533]  ? _raw_spin_unlock+0x27/0x40
[  989.716537]  ? __alloc_fd+0xf7/0x210
[  989.716539]  do_sys_open+0x115/0x1f0
[  989.716542]  SyS_open+0x1e/0x20
[  989.716546]  entry_SYSCALL_64_fastpath+0x23/0xc6

Signed-off-by: Hannes Reinecke <hare@suse.com>

--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Bart Van Assche Jan. 31, 2017, 12:31 a.m. UTC | #1
On Wed, 2017-01-18 at 10:48 +0100, Hannes Reinecke wrote:
> @@ -1488,26 +1487,13 @@ static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
>  void disk_block_events(struct gendisk *disk)
>  {
>         struct disk_events *ev = disk->ev;
> -       unsigned long flags;
> -       bool cancel;
>  
>         if (!ev)
>                 return;
>  
> -       /*
> -        * Outer mutex ensures that the first blocker completes canceling
> -        * the event work before further blockers are allowed to finish.
> -        */
> -       mutex_lock(&ev->block_mutex);
> -
> -       spin_lock_irqsave(&ev->lock, flags);
> -       cancel = !ev->block++;
> -       spin_unlock_irqrestore(&ev->lock, flags);
> -
> -       if (cancel)
> +       if (atomic_inc_return(&ev->block) == 1)
>                 cancel_delayed_work_sync(&disk->ev->dwork);
>  
> -       mutex_unlock(&ev->block_mutex);
>  }

Hello Hannes,

I have already encountered a few times a deadlock that was caused by the
event checking code so I agree with you that it would be a big step forward
if such deadlocks wouldn't occur anymore. However, this patch realizes a
change that has not been described in the patch description, namely that
disk_block_events() calls are no longer serialized. Are you sure it is safe
to drop the serialization of disk_block_events() calls?

Thanks,

Bart.--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hannes Reinecke Jan. 31, 2017, 4:15 p.m. UTC | #2
On 01/31/2017 01:31 AM, Bart Van Assche wrote:
> On Wed, 2017-01-18 at 10:48 +0100, Hannes Reinecke wrote:
>> @@ -1488,26 +1487,13 @@ static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
>>  void disk_block_events(struct gendisk *disk)
>>  {
>>         struct disk_events *ev = disk->ev;
>> -       unsigned long flags;
>> -       bool cancel;
>>  
>>         if (!ev)
>>                 return;
>>  
>> -       /*
>> -        * Outer mutex ensures that the first blocker completes canceling
>> -        * the event work before further blockers are allowed to finish.
>> -        */
>> -       mutex_lock(&ev->block_mutex);
>> -
>> -       spin_lock_irqsave(&ev->lock, flags);
>> -       cancel = !ev->block++;
>> -       spin_unlock_irqrestore(&ev->lock, flags);
>> -
>> -       if (cancel)
>> +       if (atomic_inc_return(&ev->block) == 1)
>>                 cancel_delayed_work_sync(&disk->ev->dwork);
>>  
>> -       mutex_unlock(&ev->block_mutex);
>>  }
> 
> Hello Hannes,
> 
> I have already encountered a few times a deadlock that was caused by the
> event checking code so I agree with you that it would be a big step forward
> if such deadlocks wouldn't occur anymore. However, this patch realizes a
> change that has not been described in the patch description, namely that
> disk_block_events() calls are no longer serialized. Are you sure it is safe
> to drop the serialization of disk_block_events() calls?
> 
Well, this whole synchronization stuff it a bit weird; I so totally fail
to see the rationale for it.
But anyway, once we've converted ev->block to atomics I _think_ the
mutex_lock can remain; will be checking.

Cheers,

Hannes
Dexuan Cui Feb. 3, 2017, 12:22 p.m. UTC | #3
> From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel-
> owner@vger.kernel.org] On Behalf Of Hannes Reinecke
> Sent: Wednesday, February 1, 2017 00:15
> To: Bart Van Assche <Bart.VanAssche@sandisk.com>; hare@suse.de;
> axboe@kernel.dk
> Cc: hch@lst.de; linux-kernel@vger.kernel.org; linux-block@vger.kernel.org;
> jth@kernel.org
> Subject: Re: [PATCH] genhd: Do not hold event lock when scheduling workqueue
> elements
> 
> On 01/31/2017 01:31 AM, Bart Van Assche wrote:
> > On Wed, 2017-01-18 at 10:48 +0100, Hannes Reinecke wrote:
> >> @@ -1488,26 +1487,13 @@ static unsigned long
> disk_events_poll_jiffies(struct gendisk *disk)
> >>  void disk_block_events(struct gendisk *disk)
> >>  {
> >>         struct disk_events *ev = disk->ev;
> >> -       unsigned long flags;
> >> -       bool cancel;
> >>
> >>         if (!ev)
> >>                 return;
> >>
> >> -       /*
> >> -        * Outer mutex ensures that the first blocker completes canceling
> >> -        * the event work before further blockers are allowed to finish.
> >> -        */
> >> -       mutex_lock(&ev->block_mutex);
> >> -
> >> -       spin_lock_irqsave(&ev->lock, flags);
> >> -       cancel = !ev->block++;
> >> -       spin_unlock_irqrestore(&ev->lock, flags);
> >> -
> >> -       if (cancel)
> >> +       if (atomic_inc_return(&ev->block) == 1)
> >>                 cancel_delayed_work_sync(&disk->ev->dwork);
> >>
> >> -       mutex_unlock(&ev->block_mutex);
> >>  }
> >
> > Hello Hannes,
> >
> > I have already encountered a few times a deadlock that was caused by the
> > event checking code so I agree with you that it would be a big step forward
> > if such deadlocks wouldn't occur anymore. However, this patch realizes a
> > change that has not been described in the patch description, namely that
> > disk_block_events() calls are no longer serialized. Are you sure it is safe
> > to drop the serialization of disk_block_events() calls?
> >
> Well, this whole synchronization stuff it a bit weird; I so totally fail
> to see the rationale for it.
> But anyway, once we've converted ev->block to atomics I _think_ the
> mutex_lock can remain; will be checking.
> 
> Cheers,
> 
> Hannes
> --

Hi, I think I got the same calltrace with today's linux-next (next-20170203).

The issue happened every time when my Linux virtual machine booted and
Hannes's patch could NOT help.

The calltrace is pasted below.

Thanks,
-- Dexuan

[    9.718802] scsi host2: storvsc_host_t
[    9.723854] scsi 2:0:0:0: Direct-Access     Msft     Virtual Disk     1.0  PQ: 0 ANSI: 5
[    9.753161] sd 2:0:0:0: Attached scsi generic sg1 type 0
[    9.766383] scsi host3: storvsc_host_t
[    9.771759] scsi 3:0:0:0: Direct-Access     Msft     Virtual Disk     1.0  PQ: 0 ANSI: 5
[    9.781836] hv_utils: VSS IC version 5.0
[    9.822511] sd 3:0:0:0: Attached scsi generic sg2 type 0
[    9.829039] sd 3:0:0:0: [sdb] 266338304 512-byte logical blocks: (136 GB/127 GiB)
[    9.838525] sd 3:0:0:0: [sdb] 4096-byte physical blocks
[    9.845350] sd 3:0:0:0: [sdb] Write Protect is off
[    9.851077] sd 3:0:0:0: [sdb] Mode Sense: 0f 00 00 00
[    9.859765] sd 3:0:0:0: [sdb] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[    9.872728]  sdb: sdb1
[    9.877279] sd 3:0:0:0: [sdb] Attached SCSI disk
[    9.964093] psmouse serio1: trackpoint: failed to get extended button data
[   14.864110] psmouse serio1: trackpoint: IBM TrackPoint firmware: 0x01, buttons: 0/0
[   14.876423] input: TPPS/2 IBM TrackPoint as /devices/platform/i8042/serio1/input/input3
[   14.887216] input: AT Translated Set 2 keyboard as /devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A03:00/device:07/VMBUS:01/d34b2567-b9b6-42b9-8778-0a4ec0b955bf/serio2/input/input5
[   44.644061] random: crng init done
[   66.524169] hv_utils: KVP IC version 4.0
Begin: Loading essential drivers ... done.
Begin: Running /scripts/init-premount ... done.
Begin: Mounting root file system ... Begin: Running /scripts/local-top ... done.
Begin: Running /scripts/local-premount ... done.
Begin: Waiting for root file system ... Begin: Running /scripts/local-block ... done.
Begin: Running /scripts/local-block ... done.
Begin: Running /scripts/local-block ... done.
Begin: Running /scripts/local-block ... done.
[  242.652127] INFO: task systemd-udevd:183 blocked for more than 120 seconds.
[  242.661008]       Not tainted 4.10.0-rc6-next-20170203+ #2
[  242.697270] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  242.707872] systemd-udevd   D    0   183    170 0x00000004
[  242.714654] Call Trace:
[  242.717904]  __schedule+0x27d/0x8d0
[  242.724618]  schedule+0x36/0x80
[  242.729042]  schedule_timeout+0x235/0x3f0
[  242.736667]  ? sched_clock+0x9/0x10
[  242.741144]  ? try_to_wake_up+0x4a/0x460
[  242.745580]  wait_for_completion+0xa5/0x120
[  242.751417]  ? wake_up_q+0x70/0x70
[  242.759394]  flush_work+0x11a/0x1c0
[  242.764846]  ? worker_detach_from_pool+0xb0/0xb0
[  242.770943]  __cancel_work_timer+0xf3/0x1b0
[  242.776073]  ? disk_map_sector_rcu+0x70/0x70
[  242.783857]  cancel_delayed_work_sync+0x13/0x20
[  242.789438]  disk_block_events+0x34/0x40
[  242.794641]  __blkdev_get+0x10c/0x470
[  242.799227]  blkdev_get+0x11a/0x320
[  242.803161]  ? unlock_new_inode+0x49/0x80
[  242.807451]  ? bdget+0x110/0x130
[  242.811415]  blkdev_open+0x5b/0x70
[  242.820215]  do_dentry_open+0x208/0x310
[  242.825336]  ? blkdev_get_by_dev+0x50/0x50
[  242.836362]  vfs_open+0x4c/0x70
[  242.839839]  ? may_open+0x9b/0x100
[  242.843803]  path_openat+0x297/0x13e0
[  242.848410]  ? _copy_to_user+0x2e/0x40
[  242.852966]  ? move_addr_to_user+0xa3/0xc0
[  242.857961]  do_filp_open+0x7e/0xe0
[  242.862154]  ? _cond_resched+0x1a/0x50
[  242.867045]  ? kmem_cache_alloc+0x156/0x1b0
[  242.872004]  ? getname_flags+0x56/0x1f0
[  242.881609]  ? __alloc_fd+0x46/0x170
[  242.891812]  do_sys_open+0x11b/0x1f0
[  242.896506]  SyS_open+0x1e/0x20
[  242.900365]  entry_SYSCALL_64_fastpath+0x1e/0xad
[  242.905607] RIP: 0033:0x7efd3827aca0
[  242.909773] RSP: 002b:00007ffd7ceaeb38 EFLAGS: 00000246 ORIG_RAX: 0000000000000002
[  242.918778] RAX: ffffffffffffffda RBX: 0000555fdf1a2d40 RCX: 00007efd3827aca0
[  242.925725] RDX: 0000555fde106058 RSI: 00000000000a0800 RDI: 0000555fdf1a2ff0
[  242.929715] RBP: 0000000000000000 R08: 0000000000000073 R09: 0000000000000003
[  242.933921] R10: 00007efd37fe6520 R11: 0000000000000246 R12: 0000000000000000
[  242.942260] R13: 0000555fdf1a2d40 R14: 00007ffd7ceaea00 R15: 0000000000000000
[  242.952780] INFO: task systemd-udevd:191 blocked for more than 120 seconds.
[  242.960994]       Not tainted 4.10.0-rc6-next-20170203+ #2
[  242.966936] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  242.975395] systemd-udevd   D    0   191    170 0x00000004
[  242.983460] Call Trace:
[  242.986694]  __schedule+0x27d/0x8d0
[  242.992603]  ? mutex_lock+0x12/0x40
[  242.996467]  ? kprobes_module_callback+0x15b/0x1d0
[  243.001934]  schedule+0x36/0x80
[  243.005908]  async_synchronize_cookie_domain+0x91/0x130
[  243.012331]  ? wake_atomic_t_function+0x60/0x60
[  243.017690]  async_synchronize_full+0x17/0x20
[  243.022662]  do_init_module+0xc1/0x1ff
[  243.026965]  load_module+0x2313/0x29a0
[  243.031232]  ? __symbol_put+0x40/0x40
[  243.035347]  ? security_kernel_post_read_file+0x6b/0x80
[  243.042541]  SYSC_finit_module+0xbc/0xf0
[  243.045130]  SyS_finit_module+0xe/0x10
[  243.049116]  entry_SYSCALL_64_fastpath+0x1e/0xad
[  243.054517] RIP: 0033:0x7efd37fa1c19
[  243.058744] RSP: 002b:00007ffd7ceae188 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[  243.067483] RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007efd37fa1c19
[  243.075801] RDX: 0000000000000000 RSI: 00007efd38497e2a RDI: 000000000000000c
[  243.084162] RBP: 00007ffd7cead190 R08: 0000000000000000 R09: 0000000000000000
[  243.093738] R10: 000000000000000c R11: 0000000000000246 R12: 0000555fdf1a3300
[  243.103158] R13: 00007ffd7cead170 R14: 0000000000000005 R15: 000000000aba9500
[  363.484110] INFO: task systemd-udevd:183 blocked for more than 120 seconds.
[  363.493200]       Not tainted 4.10.0-rc6-next-20170203+ #2
[  363.500109] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  363.509635] systemd-udevd   D    0   183    170 0x00000004
[  363.516428] Call Trace:
[  363.519492]  __schedule+0x27d/0x8d0
[  363.523850]  schedule+0x36/0x80
[  363.529927]  schedule_timeout+0x235/0x3f0
[  363.534884]  ? sched_clock+0x9/0x10
[  363.539006]  ? try_to_wake_up+0x4a/0x460
[  363.543686]  wait_for_completion+0xa5/0x120
[  363.548831]  ? wake_up_q+0x70/0x70
[  363.552993]  flush_work+0x11a/0x1c0
[  363.557236]  ? worker_detach_from_pool+0xb0/0xb0
[  363.562851]  __cancel_work_timer+0xf3/0x1b0
[  363.567942]  ? disk_map_sector_rcu+0x70/0x70
[  363.572973]  cancel_delayed_work_sync+0x13/0x20
[  363.580271]  disk_block_events+0x34/0x40
[  363.585284]  __blkdev_get+0x10c/0x470
[  363.589774]  blkdev_get+0x11a/0x320
[  363.593851]  ? unlock_new_inode+0x49/0x80
[  363.598509]  ? bdget+0x110/0x130
[  363.602272]  blkdev_open+0x5b/0x70
[  363.606254]  do_dentry_open+0x208/0x310
[  363.610722]  ? blkdev_get_by_dev+0x50/0x50
[  363.615493]  vfs_open+0x4c/0x70
[  363.619287]  ? may_open+0x9b/0x100
[  363.623386]  path_openat+0x297/0x13e0
[  363.627752]  ? _copy_to_user+0x2e/0x40
[  363.633978]  ? move_addr_to_user+0xa3/0xc0
[  363.638833]  do_filp_open+0x7e/0xe0
[  363.642809]  ? _cond_resched+0x1a/0x50
[  363.647070]  ? kmem_cache_alloc+0x156/0x1b0
[  363.651785]  ? getname_flags+0x56/0x1f0
[  363.656285]  ? __alloc_fd+0x46/0x170
[  363.660490]  do_sys_open+0x11b/0x1f0
[  363.664655]  SyS_open+0x1e/0x20
[  363.668405]  entry_SYSCALL_64_fastpath+0x1e/0xad
[  363.674075] RIP: 0033:0x7efd3827aca0
[  363.678582] RSP: 002b:00007ffd7ceaeb38 EFLAGS: 00000246 ORIG_RAX: 0000000000000002
[  363.689873] RAX: ffffffffffffffda RBX: 0000555fdf1a2d40 RCX: 00007efd3827aca0
[  363.698127] RDX: 0000555fde106058 RSI: 00000000000a0800 RDI: 0000555fdf1a2ff0
[  363.706494] RBP: 0000000000000000 R08: 0000000000000073 R09: 0000000000000003
[  363.714696] R10: 00007efd37fe6520 R11: 0000000000000246 R12: 0000000000000000
[  363.722922] R13: 0000555fdf1a2d40 R14: 00007ffd7ceaea00 R15: 0000000000000000
[  363.731123] INFO: task systemd-udevd:191 blocked for more than 120 seconds.
[  363.739454]       Not tainted 4.10.0-rc6-next-20170203+ #2
[  363.745246] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  363.753890] systemd-udevd   D    0   191    170 0x00000004
[  363.760085] Call Trace:
[  363.763006]  __schedule+0x27d/0x8d0
[  363.767016]  ? mutex_lock+0x12/0x40
[  363.770943]  ? kprobes_module_callback+0x15b/0x1d0
[  363.780593]  schedule+0x36/0x80
[  363.785119]  async_synchronize_cookie_domain+0x91/0x130
[  363.793021]  ? wake_atomic_t_function+0x60/0x60
[  363.798351]  async_synchronize_full+0x17/0x20
[  363.803405]  do_init_module+0xc1/0x1ff
[  363.807564]  load_module+0x2313/0x29a0
[  363.811786]  ? __symbol_put+0x40/0x40
[  363.815895]  ? security_kernel_post_read_file+0x6b/0x80
[  363.821852]  SYSC_finit_module+0xbc/0xf0
[  363.826433]  SyS_finit_module+0xe/0x10
[  363.830732]  entry_SYSCALL_64_fastpath+0x1e/0xad
[  363.836311] RIP: 0033:0x7efd37fa1c19
[  363.842740] RSP: 002b:00007ffd7ceae188 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[  363.852003] RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007efd37fa1c19
[  363.860774] RDX: 0000000000000000 RSI: 00007efd38497e2a RDI: 000000000000000c
[  363.869669] RBP: 00007ffd7cead190 R08: 0000000000000000 R09: 0000000000000000
[  363.878494] R10: 000000000000000c R11: 0000000000000246 R12: 0000555fdf1a3300
[  363.887931] R13: 00007ffd7cead170 R14: 0000000000000005 R15: 000000000aba9500
[  484.316121] INFO: task systemd-udevd:183 blocked for more than 120 seconds.
[  484.327141]       Not tainted 4.10.0-rc6-next-20170203+ #2
[  484.333747] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  484.342486] systemd-udevd   D    0   183    170 0x00000004
[  484.348790] Call Trace:
[  484.351712]  __schedule+0x27d/0x8d0
[  484.355904]  schedule+0x36/0x80
[  484.359718]  schedule_timeout+0x235/0x3f0
[  484.364932]  ? sched_clock+0x9/0x10
[  484.368892]  ? try_to_wake_up+0x4a/0x460
[  484.376582]  wait_for_completion+0xa5/0x120
[  484.383779]  ? wake_up_q+0x70/0x70
[  484.389885]  flush_work+0x11a/0x1c0
[  484.394048]  ? worker_detach_from_pool+0xb0/0xb0
[  484.399464]  __cancel_work_timer+0xf3/0x1b0
[  484.404326]  ? disk_map_sector_rcu+0x70/0x70
[  484.409379]  cancel_delayed_work_sync+0x13/0x20
[  484.414449]  disk_block_events+0x34/0x40
[  484.418615]  __blkdev_get+0x10c/0x470
[  484.423391]  blkdev_get+0x11a/0x320
[  484.429312]  ? unlock_new_inode+0x49/0x80
[  484.433618]  ? bdget+0x110/0x130
[  484.437300]  blkdev_open+0x5b/0x70
[  484.441376]  do_dentry_open+0x208/0x310
[  484.446108]  ? blkdev_get_by_dev+0x50/0x50
[  484.450946]  vfs_open+0x4c/0x70
[  484.454693]  ? may_open+0x9b/0x100
[  484.458682]  path_openat+0x297/0x13e0
[  484.462656]  ? _copy_to_user+0x2e/0x40
[  484.467125]  ? move_addr_to_user+0xa3/0xc0
[  484.472370]  do_filp_open+0x7e/0xe0
[  484.479195]  ? _cond_resched+0x1a/0x50
[  484.484027]  ? kmem_cache_alloc+0x156/0x1b0
[  484.488851]  ? getname_flags+0x56/0x1f0
[  484.493049]  ? __alloc_fd+0x46/0x170
[  484.496899]  do_sys_open+0x11b/0x1f0
[  484.500784]  SyS_open+0x1e/0x20
[  484.504284]  entry_SYSCALL_64_fastpath+0x1e/0xad
[  484.510767] RIP: 0033:0x7efd3827aca0
[  484.515460] RSP: 002b:00007ffd7ceaeb38 EFLAGS: 00000246 ORIG_RAX: 0000000000000002
[  484.527246] RAX: ffffffffffffffda RBX: 0000555fdf1a2d40 RCX: 00007efd3827aca0
[  484.537307] RDX: 0000555fde106058 RSI: 00000000000a0800 RDI: 0000555fdf1a2ff0
[  484.545620] RBP: 0000000000000000 R08: 0000000000000073 R09: 0000000000000003
[  484.554372] R10: 00007efd37fe6520 R11: 0000000000000246 R12: 0000000000000000
[  484.563267] R13: 0000555fdf1a2d40 R14: 00007ffd7ceaea00 R15: 0000000000000000
[  484.571966] INFO: task systemd-udevd:191 blocked for more than 120 seconds.
[  484.580711]       Not tainted 4.10.0-rc6-next-20170203+ #2
[  484.589555] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  484.598799] systemd-udevd   D    0   191    170 0x00000004
[  484.605231] Call Trace:
[  484.608308]  __schedule+0x27d/0x8d0
[  484.612702]  ? mutex_lock+0x12/0x40
[  484.617071]  ? kprobes_module_callback+0x15b/0x1d0
[  484.621625]  schedule+0x36/0x80
[  484.623511]  async_synchronize_cookie_domain+0x91/0x130
[  484.626443]  ? wake_atomic_t_function+0x60/0x60
[  484.629059]  async_synchronize_full+0x17/0x20
[  484.635723]  do_init_module+0xc1/0x1ff
[  484.640226]  load_module+0x2313/0x29a0
[  484.645030]  ? __symbol_put+0x40/0x40
[  484.649870]  ? security_kernel_post_read_file+0x6b/0x80
[  484.658016]  SYSC_finit_module+0xbc/0xf0
[  484.662676]  SyS_finit_module+0xe/0x10
[  484.667374]  entry_SYSCALL_64_fastpath+0x1e/0xad
[  484.673822] RIP: 0033:0x7efd37fa1c19
[  484.677771] RSP: 002b:00007ffd7ceae188 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[  484.688668] RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007efd37fa1c19
[  484.697959] RDX: 0000000000000000 RSI: 00007efd38497e2a RDI: 000000000000000c
[  484.707793] RBP: 00007ffd7cead190 R08: 0000000000000000 R09: 0000000000000000
[  484.716695] R10: 000000000000000c R11: 0000000000000246 R12: 0000555fdf1a3300
[  484.724415] R13: 00007ffd7cead170 R14: 0000000000000005 R15: 000000000aba9500
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dexuan Cui Feb. 7, 2017, 2:23 a.m. UTC | #4
> From: linux-block-owner@vger.kernel.org [mailto:linux-block-
> owner@vger.kernel.org] On Behalf Of Dexuan Cui
> Sent: Friday, February 3, 2017 20:23
> To: Hannes Reinecke <hare@suse.com>; Bart Van Assche
> <Bart.VanAssche@sandisk.com>; hare@suse.de; axboe@kernel.dk
> Cc: hch@lst.de; linux-kernel@vger.kernel.org; linux-block@vger.kernel.org;
> jth@kernel.org
> Subject: RE: [PATCH] genhd: Do not hold event lock when scheduling workqueue
> elements
> 
> > From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel-
> > owner@vger.kernel.org] On Behalf Of Hannes Reinecke
> > Sent: Wednesday, February 1, 2017 00:15
> > To: Bart Van Assche <Bart.VanAssche@sandisk.com>; hare@suse.de;
> > axboe@kernel.dk
> > Cc: hch@lst.de; linux-kernel@vger.kernel.org; linux-block@vger.kernel.org;
> > jth@kernel.org
> > Subject: Re: [PATCH] genhd: Do not hold event lock when scheduling
> workqueue
> > elements
> >
> > On 01/31/2017 01:31 AM, Bart Van Assche wrote:
> > > On Wed, 2017-01-18 at 10:48 +0100, Hannes Reinecke wrote:
> > >> @@ -1488,26 +1487,13 @@ static unsigned long
> > disk_events_poll_jiffies(struct gendisk *disk)
> > >>  void disk_block_events(struct gendisk *disk)
> > >>  {
> > >>         struct disk_events *ev = disk->ev;
> > >> -       unsigned long flags;
> > >> -       bool cancel;
> > >>
> > >>         if (!ev)
> > >>                 return;
> > >>
> > >> -       /*
> > >> -        * Outer mutex ensures that the first blocker completes canceling
> > >> -        * the event work before further blockers are allowed to finish.
> > >> -        */
> > >> -       mutex_lock(&ev->block_mutex);
> > >> -
> > >> -       spin_lock_irqsave(&ev->lock, flags);
> > >> -       cancel = !ev->block++;
> > >> -       spin_unlock_irqrestore(&ev->lock, flags);
> > >> -
> > >> -       if (cancel)
> > >> +       if (atomic_inc_return(&ev->block) == 1)
> > >>                 cancel_delayed_work_sync(&disk->ev->dwork);
> > >>
> > >> -       mutex_unlock(&ev->block_mutex);
> > >>  }
> > >
> > > Hello Hannes,
> > >
> > > I have already encountered a few times a deadlock that was caused by the
> > > event checking code so I agree with you that it would be a big step forward
> > > if such deadlocks wouldn't occur anymore. However, this patch realizes a
> > > change that has not been described in the patch description, namely that
> > > disk_block_events() calls are no longer serialized. Are you sure it is safe
> > > to drop the serialization of disk_block_events() calls?
> > >
> > Well, this whole synchronization stuff it a bit weird; I so totally fail
> > to see the rationale for it.
> > But anyway, once we've converted ev->block to atomics I _think_ the
> > mutex_lock can remain; will be checking.
> >
> > Cheers,
> >
> > Hannes
> > --
> 
> Hi, I think I got the same calltrace with today's linux-next (next-20170203).
> 
> The issue happened every time when my Linux virtual machine booted and
> Hannes's patch could NOT help.
> 
> The calltrace is pasted below.
> 
> -- Dexuan
 
Any news on this thread?

The issue is still blocking Linux from booting up normally in my test. :-(

Have we identified the faulty patch?
If so, at least I can try to revert it to boot up.

Thanks,
-- Dexuan
Bart Van Assche Feb. 7, 2017, 2:56 a.m. UTC | #5
On Tue, 2017-02-07 at 02:23 +0000, Dexuan Cui wrote:
> Any news on this thread?

> 

> The issue is still blocking Linux from booting up normally in my test. :-(

> 

> Have we identified the faulty patch?

> If so, at least I can try to revert it to boot up.


It's interesting that you have a reproducible testcase. If you can tell me how to
reproduce this I'll have a look at it together with Hannes.

Bart.
Dexuan Cui Feb. 7, 2017, 3:48 a.m. UTC | #6
> From: Bart Van Assche [mailto:Bart.VanAssche@sandisk.com]

>

> On Tue, 2017-02-07 at 02:23 +0000, Dexuan Cui wrote:

> > Any news on this thread?

> >

> > The issue is still blocking Linux from booting up normally in my test. :-(

> >

> > Have we identified the faulty patch?

> > If so, at least I can try to revert it to boot up.

>

> It's interesting that you have a reproducible testcase. If you can tell me how to

> reproduce this I'll have a look at it together with Hannes.

>

> Bart.


I'm running a Ubuntu 16.04 guest on Hyper-V with the guest kernel replaced
with the linux-next kernel.

I can boot the guest with linux-next's next-20170130 without any issue,
but since next-20170131 I haven't succeeded in booting the guest.

With next-20170203 (mentioned in my mail last Friday), I got the same
calltrace as Hannes.

With today's linux-next (next-20170206), actually the calltrace changed to
the below:

(Please see the attached files for the kernel config and the full kernel log.)
(I applied Hannes's patch in this thread, but the situation remained the same.)

[  121.824158] INFO: task systemd-udevd:91 blocked for more than 60 seconds.
[  121.854885]       Not tainted 4.10.0-rc6-next-20170206+ #1
[  121.885004] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  121.927618] systemd-udevd   D12816    91     86 0x00000000
[  121.952912] Call Trace:
[  121.964366]  __schedule+0x2a9/0x900
[  121.979931]  schedule+0x36/0x80
[  121.995288]  async_synchronize_cookie_domain+0x91/0x130
[  122.023036]  ? remove_wait_queue+0x70/0x70
[  122.051383]  async_synchronize_full+0x17/0x20
[  122.076925]  do_init_module+0xc1/0x1f9
[  122.097530]  load_module+0x24bc/0x2980
[  122.118418]  ? ref_module+0x1c0/0x1c0
[  122.139060]  SYSC_finit_module+0xbc/0xf0
[  122.161566]  SyS_finit_module+0xe/0x10
[  122.185397]  entry_SYSCALL_64_fastpath+0x1e/0xb2
[  122.221880] RIP: 0033:0x7f1d69105c19
[  122.248526] RSP: 002b:00007ffe34dc3928 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[  122.283349] RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007f1d69105c19
[  122.315987] RDX: 0000000000000000 RSI: 00007f1d695fbe2a RDI: 000000000000000c
[  122.354369] RBP: 00007ffe34dc2930 R08: 0000000000000000 R09: 0000000000000000
[  122.407496] R10: 000000000000000c R11: 0000000000000246 R12: 000055f0b9b910a0
[  122.443667] R13: 00007ffe34dc2910 R14: 0000000000000005 R15: 000000000aba9500
[  122.475741]
[  122.475741] Showing all locks held in the system:
[  122.503742] 2 locks held by khungtaskd/17:
[  122.524260]  #0:  (rcu_read_lock){......}, at: [<ffffffff9a10d5f1>] watchdog+0xa1/0x3d0
[  122.569110]  #1:  (tasklist_lock){......}, at: [<ffffffff9a0aaf8d>] debug_show_all_locks+0x3d/0x1a0
[  122.623903] 2 locks held by kworker/u128:1/61:
[  122.654030]  #0:  ("events_unbound"){......}, at: [<ffffffff9a079035>] process_one_work+0x175/0x540
[  122.710469]  #1:  ((&entry->work)){......}, at: [<ffffffff9a079035>] process_one_work+0x175/0x540
[  122.770659]

Thanks,
-- Dexuan
Dexuan Cui Feb. 7, 2017, 6:29 a.m. UTC | #7
> From: linux-block-owner@vger.kernel.org [mailto:linux-block-

> owner@vger.kernel.org] On Behalf Of Dexuan Cui

> with the linux-next kernel.

> 

> I can boot the guest with linux-next's next-20170130 without any issue,

> but since next-20170131 I haven't succeeded in booting the guest.

> 

> With next-20170203 (mentioned in my mail last Friday), I got the same

> calltrace as Hannes.

> 

> With today's linux-next (next-20170206), actually the calltrace changed to

> the below.

> [  122.023036]  ? remove_wait_queue+0x70/0x70

> [  122.051383]  async_synchronize_full+0x17/0x20

> [  122.076925]  do_init_module+0xc1/0x1f9

> [  122.097530]  load_module+0x24bc/0x2980

 
I don't know why it hangs here, but this is the same calltrace in my
last-Friday mail, which contains 2 calltraces. It looks the other calltrace has
been resolved by some changes between next-20170203 and today.

Here the kernel is trying to load the Hyper-V storage driver (hv_storvsc), and
the driver's __init and .probe have finished successfully and then the kernel
hangs here.

I believe something is broken recently, because I don't have any issue before
Jan 31. 

Thanks,
-- Dexuan
Jens Axboe Feb. 7, 2017, 4:09 p.m. UTC | #8
On 02/06/2017 11:29 PM, Dexuan Cui wrote:
>> From: linux-block-owner@vger.kernel.org [mailto:linux-block-
>> owner@vger.kernel.org] On Behalf Of Dexuan Cui
>> with the linux-next kernel.
>>
>> I can boot the guest with linux-next's next-20170130 without any issue,
>> but since next-20170131 I haven't succeeded in booting the guest.
>>
>> With next-20170203 (mentioned in my mail last Friday), I got the same
>> calltrace as Hannes.
>>
>> With today's linux-next (next-20170206), actually the calltrace changed to
>> the below.
>> [  122.023036]  ? remove_wait_queue+0x70/0x70
>> [  122.051383]  async_synchronize_full+0x17/0x20
>> [  122.076925]  do_init_module+0xc1/0x1f9
>> [  122.097530]  load_module+0x24bc/0x2980
>  
> I don't know why it hangs here, but this is the same calltrace in my
> last-Friday mail, which contains 2 calltraces. It looks the other calltrace has
> been resolved by some changes between next-20170203 and today.
> 
> Here the kernel is trying to load the Hyper-V storage driver (hv_storvsc), and
> the driver's __init and .probe have finished successfully and then the kernel
> hangs here.
> 
> I believe something is broken recently, because I don't have any issue before
> Jan 31. 

Can you try and bisect it?
Dexuan Cui Feb. 8, 2017, 10:48 a.m. UTC | #9
> From: Jens Axboe [mailto:axboe@kernel.dk]

> Sent: Wednesday, February 8, 2017 00:09

> To: Dexuan Cui <decui@microsoft.com>; Bart Van Assche

> <Bart.VanAssche@sandisk.com>; hare@suse.com; hare@suse.de

> Cc: hch@lst.de; linux-kernel@vger.kernel.org; linux-block@vger.kernel.org;

> jth@kernel.org

> Subject: Re: [PATCH] genhd: Do not hold event lock when scheduling workqueue

> elements

> 

> On 02/06/2017 11:29 PM, Dexuan Cui wrote:

> >> From: linux-block-owner@vger.kernel.org [mailto:linux-block-

> >> owner@vger.kernel.org] On Behalf Of Dexuan Cui

> >> with the linux-next kernel.

> >>

> >> I can boot the guest with linux-next's next-20170130 without any issue,

> >> but since next-20170131 I haven't succeeded in booting the guest.

> >>

> >> With next-20170203 (mentioned in my mail last Friday), I got the same

> >> calltrace as Hannes.

> >>

> >> With today's linux-next (next-20170206), actually the calltrace changed to

> >> the below.

> >> [  122.023036]  ? remove_wait_queue+0x70/0x70

> >> [  122.051383]  async_synchronize_full+0x17/0x20

> >> [  122.076925]  do_init_module+0xc1/0x1f9

> >> [  122.097530]  load_module+0x24bc/0x2980

> >

> > I don't know why it hangs here, but this is the same calltrace in my

> > last-Friday mail, which contains 2 calltraces. It looks the other calltrace has

> > been resolved by some changes between next-20170203 and today.

> >

> > Here the kernel is trying to load the Hyper-V storage driver (hv_storvsc), and

> > the driver's __init and .probe have finished successfully and then the kernel

> > hangs here.

> >

> > I believe something is broken recently, because I don't have any issue before

> > Jan 31.

> 

> Can you try and bisect it?

> 

> Jens Axboe


I bisected it on the branch for-4.11/next of the linux-block repo and the log shows
the first bad commit is 
[e9c787e6] scsi: allocate scsi_cmnd structures as part of struct request

# git bisect log
git bisect start
# bad: [80c6b15732f0d8830032149cbcbc8d67e074b5e8] blk-mq-sched: (un)register elevator when (un)registering queue
git bisect bad 80c6b15732f0d8830032149cbcbc8d67e074b5e8
# good: [309bd96af9e26da3038661bf5cdad780eef49dd9] md: cleanup bio op / flags handling in raid1_write_request
git bisect good 309bd96af9e26da3038661bf5cdad780eef49dd9
# bad: [27410a8927fb89bd150de08d749a8ed7f67b7739] nbd: remove REQ_TYPE_DRV_PRIV leftovers
git bisect bad 27410a8927fb89bd150de08d749a8ed7f67b7739
# bad: [e9c787e65c0c36529745be47d490d998b4b6e589] scsi: allocate scsi_cmnd structures as part of struct request
git bisect bad e9c787e65c0c36529745be47d490d998b4b6e589
# good: [3278255741326b6d66d8ca7d1cb2c57633ee43d9] scsi_dh_rdac: switch to scsi_execute_req_flags()
git bisect good 3278255741326b6d66d8ca7d1cb2c57633ee43d9
# good: [0fbc3e0ff623f1012e7c2af96e781eeb26bcc0d7] scsi: remove gfp_flags member in scsi_host_cmd_pool
git bisect good 0fbc3e0ff623f1012e7c2af96e781eeb26bcc0d7
# good: [eeff68c5618c8d0920b14533c70b2df007bd94b4] scsi: remove scsi_cmd_dma_pool
git bisect good eeff68c5618c8d0920b14533c70b2df007bd94b4
# good: [d48777a633d6fa7ccde0f0e6509f0c01fbfc5299] scsi: remove __scsi_alloc_queue
git bisect good d48777a633d6fa7ccde0f0e6509f0c01fbfc5299
# first bad commit: [e9c787e65c0c36529745be47d490d998b4b6e589] scsi: allocate scsi_cmnd structures as part of struct request

Thanks,
-- Dexuan
Jens Axboe Feb. 8, 2017, 5:43 p.m. UTC | #10
On 02/08/2017 03:48 AM, Dexuan Cui wrote:
>> From: Jens Axboe [mailto:axboe@kernel.dk]
>> Sent: Wednesday, February 8, 2017 00:09
>> To: Dexuan Cui <decui@microsoft.com>; Bart Van Assche
>> <Bart.VanAssche@sandisk.com>; hare@suse.com; hare@suse.de
>> Cc: hch@lst.de; linux-kernel@vger.kernel.org; linux-block@vger.kernel.org;
>> jth@kernel.org
>> Subject: Re: [PATCH] genhd: Do not hold event lock when scheduling workqueue
>> elements
>>
>> On 02/06/2017 11:29 PM, Dexuan Cui wrote:
>>>> From: linux-block-owner@vger.kernel.org [mailto:linux-block-
>>>> owner@vger.kernel.org] On Behalf Of Dexuan Cui
>>>> with the linux-next kernel.
>>>>
>>>> I can boot the guest with linux-next's next-20170130 without any issue,
>>>> but since next-20170131 I haven't succeeded in booting the guest.
>>>>
>>>> With next-20170203 (mentioned in my mail last Friday), I got the same
>>>> calltrace as Hannes.
>>>>
>>>> With today's linux-next (next-20170206), actually the calltrace changed to
>>>> the below.
>>>> [  122.023036]  ? remove_wait_queue+0x70/0x70
>>>> [  122.051383]  async_synchronize_full+0x17/0x20
>>>> [  122.076925]  do_init_module+0xc1/0x1f9
>>>> [  122.097530]  load_module+0x24bc/0x2980
>>>
>>> I don't know why it hangs here, but this is the same calltrace in my
>>> last-Friday mail, which contains 2 calltraces. It looks the other calltrace has
>>> been resolved by some changes between next-20170203 and today.
>>>
>>> Here the kernel is trying to load the Hyper-V storage driver (hv_storvsc), and
>>> the driver's __init and .probe have finished successfully and then the kernel
>>> hangs here.
>>>
>>> I believe something is broken recently, because I don't have any issue before
>>> Jan 31.
>>
>> Can you try and bisect it?
>>
>> Jens Axboe
> 
> I bisected it on the branch for-4.11/next of the linux-block repo and the log shows
> the first bad commit is 
> [e9c787e6] scsi: allocate scsi_cmnd structures as part of struct request
> 
> # git bisect log
> git bisect start
> # bad: [80c6b15732f0d8830032149cbcbc8d67e074b5e8] blk-mq-sched: (un)register elevator when (un)registering queue
> git bisect bad 80c6b15732f0d8830032149cbcbc8d67e074b5e8
> # good: [309bd96af9e26da3038661bf5cdad780eef49dd9] md: cleanup bio op / flags handling in raid1_write_request
> git bisect good 309bd96af9e26da3038661bf5cdad780eef49dd9
> # bad: [27410a8927fb89bd150de08d749a8ed7f67b7739] nbd: remove REQ_TYPE_DRV_PRIV leftovers
> git bisect bad 27410a8927fb89bd150de08d749a8ed7f67b7739
> # bad: [e9c787e65c0c36529745be47d490d998b4b6e589] scsi: allocate scsi_cmnd structures as part of struct request
> git bisect bad e9c787e65c0c36529745be47d490d998b4b6e589
> # good: [3278255741326b6d66d8ca7d1cb2c57633ee43d9] scsi_dh_rdac: switch to scsi_execute_req_flags()
> git bisect good 3278255741326b6d66d8ca7d1cb2c57633ee43d9
> # good: [0fbc3e0ff623f1012e7c2af96e781eeb26bcc0d7] scsi: remove gfp_flags member in scsi_host_cmd_pool
> git bisect good 0fbc3e0ff623f1012e7c2af96e781eeb26bcc0d7
> # good: [eeff68c5618c8d0920b14533c70b2df007bd94b4] scsi: remove scsi_cmd_dma_pool
> git bisect good eeff68c5618c8d0920b14533c70b2df007bd94b4
> # good: [d48777a633d6fa7ccde0f0e6509f0c01fbfc5299] scsi: remove __scsi_alloc_queue
> git bisect good d48777a633d6fa7ccde0f0e6509f0c01fbfc5299
> # first bad commit: [e9c787e65c0c36529745be47d490d998b4b6e589] scsi: allocate scsi_cmnd structures as part of struct request

Christoph?

I've changed the subject line, this issue has nothing to do with the
issue that Hannes was attempting to fix.
Christoph Hellwig Feb. 8, 2017, 6:03 p.m. UTC | #11
On Wed, Feb 08, 2017 at 10:43:59AM -0700, Jens Axboe wrote:
> I've changed the subject line, this issue has nothing to do with the
> issue that Hannes was attempting to fix.

Nothing really useful in the thread.  Dexuan, can you throw in some
prints to see which command times out?
Dexuan Cui Feb. 9, 2017, 7:35 a.m. UTC | #12
> From: hch@lst.de [mailto:hch@lst.de]
> Sent: Thursday, February 9, 2017 02:03
> To: Jens Axboe <axboe@kernel.dk>
> Cc: Dexuan Cui <decui@microsoft.com>; Bart Van Assche
> <Bart.VanAssche@sandisk.com>; hare@suse.com; hare@suse.de; Martin K.
> Petersen <martin.petersen@oracle.com>; hch@lst.de; linux-
> kernel@vger.kernel.org; linux-block@vger.kernel.org; jth@kernel.org
> Subject: Re: Boot regression (was "Re: [PATCH] genhd: Do not hold event lock
> when scheduling workqueue elements")
> 
> On Wed, Feb 08, 2017 at 10:43:59AM -0700, Jens Axboe wrote:
> > I've changed the subject line, this issue has nothing to do with the
> > issue that Hannes was attempting to fix.
> 
> Nothing really useful in the thread.  Dexuan, can you throw in some
> prints to see which command times out?

My colleagues have sent you the logs in another thread.

Thanks for looking into this!

-- Dexuan
Christoph Hellwig Feb. 9, 2017, 1:08 p.m. UTC | #13
Hi Dexuan,

I've spent some time with the logs and looking over the code and
couldn't find any smoking gun.  I start to wonder if it might just
be a timing issue?

Can you try one or two things for me:

 1) run with the blk-mq I/O path for scsi by either enabling it a boot /
    module load time with the scsi_mod.use_blk_mq=Y option, or at compile
    time by enabling the CONFIG_SCSI_MQ_DEFAULT option.  If that fails
    with the commit a blk-mq run before the commit would also be useful.
 2) if possible run a VM config without the virtual CD-ROM drive -
    a lot of the scsi log chatter is about handling timeouts on the
    CD drive, so that might be able to isolate issues a bit better.

Note that I'll be offline from this afternoon European time until Sunday
night as I'm out in the mountains at a lodge without internet access,
but this issue will be my priority once back.
diff mbox

Patch

diff --git a/block/genhd.c b/block/genhd.c
index fcd6d4f..ae46caa 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1426,8 +1426,7 @@  struct disk_events {
 	struct gendisk		*disk;		/* the associated disk */
 	spinlock_t		lock;
 
-	struct mutex		block_mutex;	/* protects blocking */
-	int			block;		/* event blocking depth */
+	atomic_t		block;		/* event blocking depth */
 	unsigned int		pending;	/* events already sent out */
 	unsigned int		clearing;	/* events being cleared */
 
@@ -1488,26 +1487,13 @@  static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
 void disk_block_events(struct gendisk *disk)
 {
 	struct disk_events *ev = disk->ev;
-	unsigned long flags;
-	bool cancel;
 
 	if (!ev)
 		return;
 
-	/*
-	 * Outer mutex ensures that the first blocker completes canceling
-	 * the event work before further blockers are allowed to finish.
-	 */
-	mutex_lock(&ev->block_mutex);
-
-	spin_lock_irqsave(&ev->lock, flags);
-	cancel = !ev->block++;
-	spin_unlock_irqrestore(&ev->lock, flags);
-
-	if (cancel)
+	if (atomic_inc_return(&ev->block) == 1)
 		cancel_delayed_work_sync(&disk->ev->dwork);
 
-	mutex_unlock(&ev->block_mutex);
 }
 
 static void __disk_unblock_events(struct gendisk *disk, bool check_now)
@@ -1516,23 +1502,18 @@  static void __disk_unblock_events(struct gendisk *disk, bool check_now)
 	unsigned long intv;
 	unsigned long flags;
 
-	spin_lock_irqsave(&ev->lock, flags);
-
-	if (WARN_ON_ONCE(ev->block <= 0))
-		goto out_unlock;
-
-	if (--ev->block)
-		goto out_unlock;
+	if (atomic_dec_return(&ev->block) > 0)
+		return;
 
+	spin_lock_irqsave(&ev->lock, flags);
 	intv = disk_events_poll_jiffies(disk);
+	spin_unlock_irqrestore(&ev->lock, flags);
 	if (check_now)
 		queue_delayed_work(system_freezable_power_efficient_wq,
 				&ev->dwork, 0);
 	else if (intv)
 		queue_delayed_work(system_freezable_power_efficient_wq,
 				&ev->dwork, intv);
-out_unlock:
-	spin_unlock_irqrestore(&ev->lock, flags);
 }
 
 /**
@@ -1572,10 +1553,10 @@  void disk_flush_events(struct gendisk *disk, unsigned int mask)
 
 	spin_lock_irq(&ev->lock);
 	ev->clearing |= mask;
-	if (!ev->block)
+	spin_unlock_irq(&ev->lock);
+	if (!atomic_read(&ev->block))
 		mod_delayed_work(system_freezable_power_efficient_wq,
 				&ev->dwork, 0);
-	spin_unlock_irq(&ev->lock);
 }
 
 /**
@@ -1666,12 +1647,11 @@  static void disk_check_events(struct disk_events *ev,
 	*clearing_ptr &= ~clearing;
 
 	intv = disk_events_poll_jiffies(disk);
-	if (!ev->block && intv)
+	spin_unlock_irq(&ev->lock);
+	if (!atomic_read(&ev->block) && intv)
 		queue_delayed_work(system_freezable_power_efficient_wq,
 				&ev->dwork, intv);
 
-	spin_unlock_irq(&ev->lock);
-
 	/*
 	 * Tell userland about new events.  Only the events listed in
 	 * @disk->events are reported.  Unlisted events are processed the
@@ -1824,8 +1804,7 @@  static void disk_alloc_events(struct gendisk *disk)
 	INIT_LIST_HEAD(&ev->node);
 	ev->disk = disk;
 	spin_lock_init(&ev->lock);
-	mutex_init(&ev->block_mutex);
-	ev->block = 1;
+	atomic_set(&ev->block, 1);
 	ev->poll_msecs = -1;
 	INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
 
@@ -1870,6 +1849,6 @@  static void disk_del_events(struct gendisk *disk)
 static void disk_release_events(struct gendisk *disk)
 {
 	/* the block count should be 1 from disk_del_events() */
-	WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
+	WARN_ON_ONCE(disk->ev && atomic_read(&disk->ev->block) != 1);
 	kfree(disk->ev);
 }