diff mbox series

[RFC] RDMA/srp: Fix use-after-free in srp_exit_cmd_priv

Message ID 20220624040253.1420844-1-lizhijian@fujitsu.com (mailing list archive)
State Changes Requested
Delegated to: Jason Gunthorpe
Headers show
Series [RFC] RDMA/srp: Fix use-after-free in srp_exit_cmd_priv | expand

Commit Message

Zhijian Li (Fujitsu) June 24, 2022, 4:02 a.m. UTC
srp_exit_cmd_priv() will try to access srp_device by Scsi_Host like below:

 Scsi_Host                srp_target_port     srp_host         srp_device
+------------------+ +-- +--------------+  +>+----------+   +->+---------+
|                  | |   |              |  | |          |   |  |         |
|                  | |   |  *srp_host   +--+ | *srp_dev +---+  | *dev    |
+-+hostdata--------+-+   |              |    |          |      |         |
| | srp_target_port|     |              |    |          |      |         |
| |                |     |              |    |          |      |         |
| |                |     |              |    |          |      |         |
+-+----------------+---- +--------------+    +----------+      +---------+

But sometims Scsi_Host still keeps the reference to srp_host that is
possible released already. This could be happend if i frequently abort
(Ctrl-c) the blktests during it was running and then cause below error:

[  952.294952] scsi 9:0:0:1: alua: Detached
[  952.298232] ==================================================================
[  952.298235] BUG: KASAN: use-after-free in srp_exit_cmd_priv+0x27/0xd0 [ib_srp]
[  952.298254] Read of size 8 at addr ffff888100337000 by task multipathd/16727
[  952.298258]
[  952.298263] CPU: 0 PID: 16727 Comm: multipathd Not tainted 5.19.0-rc1-roce-flush+ #78
[  952.298268] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-27-g64f37cc530f1-prebuilt.qemu.org 04/01/2014
[  952.298272] Call Trace:
[  952.298281]  <TASK>
[  952.298283]  dump_stack_lvl+0x34/0x44
[  952.298303]  print_report.cold+0x5e/0x5db
[  952.298317]  ? schedule_timeout+0x1c4/0x210
[  952.298329]  ? srp_exit_cmd_priv+0x27/0xd0 [ib_srp]
[  952.298343]  kasan_report+0xab/0x120
[  952.298356]  ? srp_exit_cmd_priv+0x27/0xd0 [ib_srp]
[  952.298370]  srp_exit_cmd_priv+0x27/0xd0 [ib_srp]
[  952.298384]  scsi_mq_exit_request+0x4d/0x70
[  952.298394]  blk_mq_free_rqs+0x143/0x410
[  952.298406]  __blk_mq_free_map_and_rqs+0x6e/0x100
[  952.298413]  blk_mq_free_tag_set+0x2b/0x160
[  952.298419]  scsi_host_dev_release+0xf3/0x1a0
[  952.298427]  device_release+0x54/0xe0
[  952.298438]  kobject_put+0xa5/0x120
[  952.298451]  device_release+0x54/0xe0
[  952.298456]  kobject_put+0xa5/0x120
[  952.298461]  scsi_device_dev_release_usercontext+0x4c1/0x4e0
[  952.298468]  ? scsi_device_cls_release+0x10/0x10
[  952.298474]  execute_in_process_context+0x23/0x90
[  952.298483]  device_release+0x54/0xe0
[  952.298487]  kobject_put+0xa5/0x120
[  952.298493]  scsi_disk_release+0x3f/0x50
[  952.298500]  device_release+0x54/0xe0
[  952.298504]  kobject_put+0xa5/0x120
[  952.298509]  disk_release+0x17f/0x1b0
[  952.298516]  device_release+0x54/0xe0
[  952.298520]  kobject_put+0xa5/0x120
[  952.298526]  dm_put_table_device+0xa3/0x160 [dm_mod]
[  952.298559]  dm_put_device+0xd0/0x140 [dm_mod]
[  952.298592]  free_priority_group+0xd8/0x110 [dm_multipath]
[  952.298602]  free_multipath+0x94/0xe0 [dm_multipath]
[  952.298612]  dm_table_destroy+0xa2/0x1e0 [dm_mod]
[  952.298645]  __dm_destroy+0x196/0x350 [dm_mod]
[  952.298676]  dev_remove+0x10c/0x160 [dm_mod]
[  952.298709]  ctl_ioctl+0x2c2/0x590 [dm_mod]
[  952.298751]  ? table_clear+0x100/0x100 [dm_mod]
[  952.298784]  ? remove_all+0x40/0x40 [dm_mod]
[  952.298820]  ? __blkcg_punt_bio_submit+0xd0/0xd0
[  952.298828]  ? __rcu_read_unlock+0x43/0x60
[  952.298838]  dm_ctl_ioctl+0x5/0x10 [dm_mod]
[  952.298870]  __x64_sys_ioctl+0xb4/0xf0
[  952.298784]  ? remove_all+0x40/0x40 [dm_mod]
[  952.298820]  ? __blkcg_punt_bio_submit+0xd0/0xd0
[  952.298828]  ? __rcu_read_unlock+0x43/0x60
[  952.298838]  dm_ctl_ioctl+0x5/0x10 [dm_mod]
[  952.298870]  __x64_sys_ioctl+0xb4/0xf0
[  952.298877]  do_syscall_64+0x3b/0x90
[  952.298884]  entry_SYSCALL_64_after_hwframe+0x46/0xb0
[  952.298890] RIP: 0033:0x7f1d7f4654eb
[  952.298896] Code: ff ff ff 85 c0 79 9b 49 c7 c4 ff ff ff ff 5b 5d 4c 89 e0 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 55 b9 0c 00 f7 d8 64 89 01 48
[  952.298903] RSP: 002b:00007f1d7de21558 EFLAGS: 00000206 ORIG_RAX: 0000000000000010
[  952.298912] RAX: ffffffffffffffda RBX: 00007f1d7f682050 RCX: 00007f1d7f4654eb
[  952.298916] RDX: 00007f1d6c016730 RSI: 00000000c138fd04 RDI: 0000000000000005
[  952.298919] RBP: 00007f1d7f6be2b6 R08: 00007f1d7de1f2b0 R09: 00007f1d7de1f200
[  952.298922] R10: 0000000000000001 R11: 0000000000000206 R12: 00007f1d6c001100
[  952.298925] R13: 00007f1d6c016760 R14: 00007f1d6c0167e0 R15: 00007f1d6c016730
[  952.298930]  </TASK>
[  952.298933]
[  952.298934] Allocated by task 16887:
[  952.298939]  kasan_save_stack+0x1e/0x40
[  952.298944]  __kasan_kmalloc+0x81/0xa0
[  952.298948]  srp_add_one+0x32a/0x540 [ib_srp]
[  952.298961]  add_client_context+0x23b/0x300 [ib_core]
[  952.299043]  ib_register_client+0x1d5/0x220 [ib_core]
[  952.299123]  0xffffffffc0a60149
[  952.299125]  do_one_initcall+0x84/0x280
[  952.299132]  do_init_module+0xdf/0x2e0
[  952.299136]  load_module+0x2b9e/0x2c90
[  952.299139]  __do_sys_finit_module+0x111/0x190
[  952.299143]  do_syscall_64+0x3b/0x90
[  952.299147]  entry_SYSCALL_64_after_hwframe+0x46/0xb0
[  952.299152]
[  952.299153] Freed by task 17289:
[  952.299156]  kasan_save_stack+0x1e/0x40
[  952.299160]  kasan_set_track+0x21/0x30
[  952.299164]  kasan_set_free_info+0x20/0x30
[  952.299169]  __kasan_slab_free+0x108/0x170
[  952.299173]  kfree+0x9a/0x320
[  952.299177]  srp_remove_one+0x114/0x180 [ib_srp]
[  952.299189]  remove_client_context+0x8f/0xd0 [ib_core]
[  952.299269]  disable_device+0xee/0x1e0 [ib_core]
[  952.299348]  __ib_unregister_device+0x59/0xf0 [ib_core]
[  952.299429]  ib_unregister_device_and_put+0x3b/0x50 [ib_core]
[  952.299509]  nldev_dellink+0x126/0x1b0 [ib_core]
[  952.299592]  rdma_nl_rcv_msg+0x1cc/0x310 [ib_core]
[  952.299673]  rdma_nl_rcv+0x172/0x200 [ib_core]
[  952.299760]  netlink_unicast+0x36b/0x4a0
[  952.299770]  netlink_sendmsg+0x3a9/0x6d0
[  952.299774]  sock_sendmsg+0x91/0xa0
[  952.299783]  __sys_sendto+0x16f/0x210
[  952.299788]  __x64_sys_sendto+0x6f/0x80
[  952.299792]  do_syscall_64+0x3b/0x90
[  952.299795]  entry_SYSCALL_64_after_hwframe+0x46/0xb0
[  952.299801]
[  952.299801] Last potentially related work creation:
[  952.299803]  kasan_save_stack+0x1e/0x40
[  952.299807]  __kasan_record_aux_stack+0x97/0xa0
[  952.299812]  call_rcu+0x41/0x580
[  952.299816]  rht_deferred_worker+0x552/0x700
[  952.299830]  kthread+0x167/0x1a0
[  952.299835]  ret_from_fork+0x22/0x30
[  952.299839]
[  952.299840] The buggy address belongs to the object at ffff888100337000
[  952.299840]  which belongs to the cache kmalloc-1k of size 1024
[  952.299844] The buggy address is located 0 bytes inside of
[  952.299844]  1024-byte region [ffff888100337000, ffff888100337400)
[  952.299848]
[  952.299849] The buggy address belongs to the physical page:
[  952.299850] page:000000005d3ee749 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x100334
[  952.299857] head:000000005d3ee749 order:2 compound_mapcount:0 compound_pincount:0
[  952.299860] flags: 0x100000000010200(slab|head|node=0|zone=2)
[  952.299868] raw: 0100000000010200 0000000000000000 dead000000000001 ffff888100041dc0
[  952.299872] raw: 0000000000000000 0000000000080008 00000001ffffffff 0000000000000000
[  952.299874] page dumped because: kasan: bad access detected
[  952.299876]
[  952.299876] Memory state around the buggy address:
[  952.299878]  ffff888100336f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  952.299882]  ffff888100336f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  952.299885] >ffff888100337000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  952.299887]                    ^
[  952.299889]  ffff888100337080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  952.299892]  ffff888100337100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb

Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
---
This patch is mainly to report the use-after-free bug. It seems the proposal
increasing the IB device's reference count is a bit ugly. Feedbacks and
ideas are very welcome.
---
 drivers/infiniband/ulp/srp/ib_srp.c | 4 ++++
 1 file changed, 4 insertions(+)

Comments

Jason Gunthorpe June 24, 2022, 10:59 p.m. UTC | #1
On Fri, Jun 24, 2022 at 12:02:53PM +0800, Li Zhijian wrote:
> srp_exit_cmd_priv() will try to access srp_device by Scsi_Host like below:
> 
>  Scsi_Host                srp_target_port     srp_host         srp_device
> +------------------+ +-- +--------------+  +>+----------+   +->+---------+
> |                  | |   |              |  | |          |   |  |         |
> |                  | |   |  *srp_host   +--+ | *srp_dev +---+  | *dev    |
> +-+hostdata--------+-+   |              |    |          |      |         |
> | | srp_target_port|     |              |    |          |      |         |
> | |                |     |              |    |          |      |         |
> | |                |     |              |    |          |      |         |
> +-+----------------+---- +--------------+    +----------+      +---------+
> 
> But sometims Scsi_Host still keeps the reference to srp_host that is
> possible released already. This could be happend if i frequently abort
> (Ctrl-c) the blktests during it was running and then cause below error:
> 
> [  952.299153] Freed by task 17289:
> [  952.299156]  kasan_save_stack+0x1e/0x40
> [  952.299160]  kasan_set_track+0x21/0x30
> [  952.299164]  kasan_set_free_info+0x20/0x30
> [  952.299169]  __kasan_slab_free+0x108/0x170
> [  952.299173]  kfree+0x9a/0x320
> [  952.299177]  srp_remove_one+0x114/0x180 [ib_srp]
> [  952.299189]  remove_client_context+0x8f/0xd0 [ib_core]
> [  952.299269]  disable_device+0xee/0x1e0 [ib_core]
> [  952.299348]  __ib_unregister_device+0x59/0xf0 [ib_core]
> [  952.299429]  ib_unregister_device_and_put+0x3b/0x50 [ib_core]
> [  952.299509]  nldev_dellink+0x126/0x1b0 [ib_core]
> [  952.299592]  rdma_nl_rcv_msg+0x1cc/0x310 [ib_core]
> [  952.299673]  rdma_nl_rcv+0x172/0x200 [ib_core]
> [  952.299760]  netlink_unicast+0x36b/0x4a0
> [  952.299770]  netlink_sendmsg+0x3a9/0x6d0
> [  952.299774]  sock_sendmsg+0x91/0xa0
> [  952.299783]  __sys_sendto+0x16f/0x210
> [  952.299788]  __x64_sys_sendto+0x6f/0x80
> [  952.299792]  do_syscall_64+0x3b/0x90
> [  952.299795]  entry_SYSCALL_64_after_hwframe+0x46/0xb0

I don't even understand how get_device() prevents this call chain??

It looks to me like the problem is srp_remove_one() is not waiting for
or canceling some outstanding work.

Jason
Bart Van Assche June 24, 2022, 11:26 p.m. UTC | #2
On 6/24/22 15:59, Jason Gunthorpe wrote:
> I don't even understand how get_device() prevents this call chain??
> 
> It looks to me like the problem is srp_remove_one() is not waiting for
> or canceling some outstanding work.

Hi Jason,

My conclusions from the call traces in Li's email are as follows:
* scsi_host_dev_release() can get called after srp_remove_one().
* srp_exit_cmd_priv() uses the ib_device pointer. If srp_remove_one() is 
called before srp_exit_cmd_priv() then a use-after-free is triggered.

Is calling get_device() and put_device() on the struct ib_device an 
acceptable way to fix this? If so, I recommend to insert a get_device() 
call after the scsi_add_host() call and put_device() calls after the two 
scsi_remove_host() calls instead of merging the patch at the start of 
this email thread.

Thanks,

Bart.
Jason Gunthorpe June 24, 2022, 11:47 p.m. UTC | #3
On Fri, Jun 24, 2022 at 04:26:06PM -0700, Bart Van Assche wrote:
> On 6/24/22 15:59, Jason Gunthorpe wrote:
> > I don't even understand how get_device() prevents this call chain??
> > 
> > It looks to me like the problem is srp_remove_one() is not waiting for
> > or canceling some outstanding work.
> 
> Hi Jason,
> 
> My conclusions from the call traces in Li's email are as follows:
> * scsi_host_dev_release() can get called after srp_remove_one().
> * srp_exit_cmd_priv() uses the ib_device pointer. If srp_remove_one() is
> called before srp_exit_cmd_priv() then a use-after-free is triggered.

Shouldn't srp_remove_one() wait for the scsi_host_dev to complete
destruction? Clearly it cannot continue to exist once the IB device
has been removed

> Is calling get_device() and put_device() on the struct ib_device an
> acceptable way to fix this? 

As I said, I don't understand at all how this works. get_device() does
not prevent srp_remove_one() from being called.

Jason
Bart Van Assche June 24, 2022, 11:57 p.m. UTC | #4
On 6/24/22 16:47, Jason Gunthorpe wrote:
> On Fri, Jun 24, 2022 at 04:26:06PM -0700, Bart Van Assche wrote:
>> On 6/24/22 15:59, Jason Gunthorpe wrote:
>>> I don't even understand how get_device() prevents this call chain??
>>>
>>> It looks to me like the problem is srp_remove_one() is not waiting for
>>> or canceling some outstanding work.
>>
>> Hi Jason,
>>
>> My conclusions from the call traces in Li's email are as follows:
>> * scsi_host_dev_release() can get called after srp_remove_one().
>> * srp_exit_cmd_priv() uses the ib_device pointer. If srp_remove_one() is
>> called before srp_exit_cmd_priv() then a use-after-free is triggered.
> 
> Shouldn't srp_remove_one() wait for the scsi_host_dev to complete
> destruction? Clearly it cannot continue to exist once the IB device
> has been removed

That sounds like an interesting approach to me. Li, do you perhaps want 
to implement this approach?

Thanks,

Bart.
Zhijian Li (Fujitsu) June 27, 2022, 6:47 a.m. UTC | #5
Sorry for the late reply

On 25/06/2022 07:47, Jason Gunthorpe wrote:
> On Fri, Jun 24, 2022 at 04:26:06PM -0700, Bart Van Assche wrote:
>> On 6/24/22 15:59, Jason Gunthorpe wrote:
>>> I don't even understand how get_device() prevents this call chain??
>>>
>>> It looks to me like the problem is srp_remove_one() is not waiting for
>>> or canceling some outstanding work.
>> Hi Jason,
>>
>> My conclusions from the call traces in Li's email are as follows:
>> * scsi_host_dev_release() can get called after srp_remove_one().
>> * srp_exit_cmd_priv() uses the ib_device pointer. If srp_remove_one() is
>> called before srp_exit_cmd_priv() then a use-after-free is triggered.
> Shouldn't srp_remove_one() wait for the scsi_host_dev to complete
> destruction? Clearly it cannot continue to exist once the IB device
> has been removed
Yes, that match my first thought, but i didn't know the exact way to notify scsi side to destroy
itself but scsi_host_put() which already called once in below chains:

srp_remove_one()
  -> srp_queue_remove_work()
     -> srp_remove_target()
        -> scsi_remove_host()
        -> scsi_host_put()

that means scsi_host_dev is still referenced by other components that we have to notify.


>
>> Is calling get_device() and put_device() on the struct ib_device an
>> acceptable way to fix this?
> As I said, I don't understand at all how this works. get_device() does
> not prevent srp_remove_one() from being called.
I originally thought that srp_remove_one was called from put_device(ibdev) ,  so increase its ref_count can avoid it being released early.


Thanks
Zhijian



> Jason
Bart Van Assche June 27, 2022, 5:28 p.m. UTC | #6
On 6/26/22 23:47, lizhijian@fujitsu.com wrote:
> On 25/06/2022 07:47, Jason Gunthorpe wrote:
>> On Fri, Jun 24, 2022 at 04:26:06PM -0700, Bart Van Assche wrote:
>>> On 6/24/22 15:59, Jason Gunthorpe wrote:
>>>> I don't even understand how get_device() prevents this call chain??
>>>>
>>>> It looks to me like the problem is srp_remove_one() is not waiting for
>>>> or canceling some outstanding work.
>>> Hi Jason,
>>>
>>> My conclusions from the call traces in Li's email are as follows:
>>> * scsi_host_dev_release() can get called after srp_remove_one().
>>> * srp_exit_cmd_priv() uses the ib_device pointer. If srp_remove_one() is
>>> called before srp_exit_cmd_priv() then a use-after-free is triggered.
>> Shouldn't srp_remove_one() wait for the scsi_host_dev to complete
>> destruction? Clearly it cannot continue to exist once the IB device
>> has been removed
> Yes, that match my first thought, but i didn't know the exact way to notify scsi side to destroy
> itself but scsi_host_put() which already called once in below chains:
> 
> srp_remove_one()
>    -> srp_queue_remove_work()
>       -> srp_remove_target()
>          -> scsi_remove_host()
>          -> scsi_host_put()
> 
> that means scsi_host_dev is still referenced by other components that we have to notify.

How about the patch below (should be sent to the SCSI maintainer)?


Subject: [PATCH] scsi: core: Call blk_mq_free_tag_set() earlier

scsi_mq_exit_request() is called by blk_mq_free_tag_set(). Since
scsi_mq_exit_request() implementations may need resources that are freed
after scsi_remove_host() has been called and before the host reference
count drops to zero, call blk_mq_free_tag_set() before the host
reference count drops to zero. blk_mq_free_tag_set() can be called
immediately after scsi_forget_host() has returned since scsi_forget_host()
drains all the request queues that use the host tag set.

This patch fixes the following use-after-free:

==================================================================
BUG: KASAN: use-after-free in srp_exit_cmd_priv+0x27/0xd0 [ib_srp]
Read of size 8 at addr ffff888100337000 by task multipathd/16727

CPU: 0 PID: 16727 Comm: multipathd Not tainted 5.19.0-rc1-roce-flush+ #78
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-27-g64f37cc530f1-prebuilt.qemu.org 04/01/2014
Call Trace:
  <TASK>
  dump_stack_lvl+0x34/0x44
  print_report.cold+0x5e/0x5db
  kasan_report+0xab/0x120
  srp_exit_cmd_priv+0x27/0xd0 [ib_srp]
  scsi_mq_exit_request+0x4d/0x70
  blk_mq_free_rqs+0x143/0x410
  __blk_mq_free_map_and_rqs+0x6e/0x100
  blk_mq_free_tag_set+0x2b/0x160
  scsi_host_dev_release+0xf3/0x1a0
  device_release+0x54/0xe0
  kobject_put+0xa5/0x120
  device_release+0x54/0xe0
  kobject_put+0xa5/0x120
  scsi_device_dev_release_usercontext+0x4c1/0x4e0
  execute_in_process_context+0x23/0x90
  device_release+0x54/0xe0
  kobject_put+0xa5/0x120
  scsi_disk_release+0x3f/0x50
  device_release+0x54/0xe0
  kobject_put+0xa5/0x120
  disk_release+0x17f/0x1b0
  device_release+0x54/0xe0
  kobject_put+0xa5/0x120
  dm_put_table_device+0xa3/0x160 [dm_mod]
  dm_put_device+0xd0/0x140 [dm_mod]
  free_priority_group+0xd8/0x110 [dm_multipath]
  free_multipath+0x94/0xe0 [dm_multipath]
  dm_table_destroy+0xa2/0x1e0 [dm_mod]
  __dm_destroy+0x196/0x350 [dm_mod]
  dev_remove+0x10c/0x160 [dm_mod]
  ctl_ioctl+0x2c2/0x590 [dm_mod]
  dm_ctl_ioctl+0x5/0x10 [dm_mod]
  __x64_sys_ioctl+0xb4/0xf0
  dm_ctl_ioctl+0x5/0x10 [dm_mod]
  __x64_sys_ioctl+0xb4/0xf0
  do_syscall_64+0x3b/0x90
  entry_SYSCALL_64_after_hwframe+0x46/0xb0

Reported-by: Li Zhijian <lizhijian@fujitsu.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Fixes: 65ca846a5314 ("scsi: core: Introduce {init,exit}_cmd_priv()")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
  drivers/scsi/hosts.c    | 10 +++++-----
  drivers/scsi/scsi_lib.c |  3 +++
  2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index ef6c0e37acce..74bfa187fe19 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -182,6 +182,8 @@ void scsi_remove_host(struct Scsi_Host *shost)
  	mutex_unlock(&shost->scan_mutex);
  	scsi_proc_host_rm(shost);

+	scsi_mq_destroy_tags(shost);
+
  	spin_lock_irqsave(shost->host_lock, flags);
  	if (scsi_host_set_state(shost, SHOST_DEL))
  		BUG_ON(scsi_host_set_state(shost, SHOST_DEL_RECOVERY));
@@ -295,8 +297,8 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
  	return error;

  	/*
-	 * Any host allocation in this function will be freed in
-	 * scsi_host_dev_release().
+	 * Any resources associated with the SCSI host in this function except
+	 * the tag set will be freed by scsi_host_dev_release().
  	 */
   out_del_dev:
  	device_del(&shost->shost_dev);
@@ -312,6 +314,7 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
  	pm_runtime_disable(&shost->shost_gendev);
  	pm_runtime_set_suspended(&shost->shost_gendev);
  	pm_runtime_put_noidle(&shost->shost_gendev);
+	scsi_mq_destroy_tags(shost);
   fail:
  	return error;
  }
@@ -345,9 +348,6 @@ static void scsi_host_dev_release(struct device *dev)
  		kfree(dev_name(&shost->shost_dev));
  	}

-	if (shost->tag_set.tags)
-		scsi_mq_destroy_tags(shost);
-
  	kfree(shost->shost_data);

  	ida_free(&host_index_ida, shost->host_no);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 6ffc9e4258a8..1aa1a279f8f3 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1990,7 +1990,10 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)

  void scsi_mq_destroy_tags(struct Scsi_Host *shost)
  {
+	if (!shost->tag_set.tags)
+		return;
  	blk_mq_free_tag_set(&shost->tag_set);
+	WARN_ON_ONCE(shost->tag_set.tags);
  }

  /**
Zhijian Li (Fujitsu) June 28, 2022, 4:41 a.m. UTC | #7
On 28/06/2022 01:28, Bart Van Assche wrote:
> How about the patch below (should be sent to the SCSI maintainer)?
>
>
> Subject: [PATCH] scsi: core: Call blk_mq_free_tag_set() earlier
>
> scsi_mq_exit_request() is called by blk_mq_free_tag_set(). Since
> scsi_mq_exit_request() implementations may need resources that are freed
> after scsi_remove_host() has been called and before the host reference
> count drops to zero, call blk_mq_free_tag_set() before the host
> reference count drops to zero. blk_mq_free_tag_set() can be called
> immediately after scsi_forget_host() has returned since scsi_forget_host()
> drains all the request queues that use the host tag set.
>
> This patch fixes the following use-after-free:
>
> ==================================================================
> BUG: KASAN: use-after-free in srp_exit_cmd_priv+0x27/0xd0 [ib_srp]
> Read of size 8 at addr ffff888100337000 by task multipathd/16727
>
> CPU: 0 PID: 16727 Comm: multipathd Not tainted 5.19.0-rc1-roce-flush+ #78
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-27-g64f37cc530f1-prebuilt.qemu.org 04/01/2014
> Call Trace:
>  <TASK>
>  dump_stack_lvl+0x34/0x44
>  print_report.cold+0x5e/0x5db
>  kasan_report+0xab/0x120
>  srp_exit_cmd_priv+0x27/0xd0 [ib_srp]
>  scsi_mq_exit_request+0x4d/0x70
>  blk_mq_free_rqs+0x143/0x410
>  __blk_mq_free_map_and_rqs+0x6e/0x100
>  blk_mq_free_tag_set+0x2b/0x160
>  scsi_host_dev_release+0xf3/0x1a0
>  device_release+0x54/0xe0
>  kobject_put+0xa5/0x120
>  device_release+0x54/0xe0
>  kobject_put+0xa5/0x120
>  scsi_device_dev_release_usercontext+0x4c1/0x4e0
>  execute_in_process_context+0x23/0x90
>  device_release+0x54/0xe0
>  kobject_put+0xa5/0x120
>  scsi_disk_release+0x3f/0x50
>  device_release+0x54/0xe0
>  kobject_put+0xa5/0x120
>  disk_release+0x17f/0x1b0
>  device_release+0x54/0xe0
>  kobject_put+0xa5/0x120
>  dm_put_table_device+0xa3/0x160 [dm_mod]
>  dm_put_device+0xd0/0x140 [dm_mod]
>  free_priority_group+0xd8/0x110 [dm_multipath]
>  free_multipath+0x94/0xe0 [dm_multipath]
>  dm_table_destroy+0xa2/0x1e0 [dm_mod]
>  __dm_destroy+0x196/0x350 [dm_mod]
>  dev_remove+0x10c/0x160 [dm_mod]
>  ctl_ioctl+0x2c2/0x590 [dm_mod]
>  dm_ctl_ioctl+0x5/0x10 [dm_mod]
>  __x64_sys_ioctl+0xb4/0xf0
>  dm_ctl_ioctl+0x5/0x10 [dm_mod]
>  __x64_sys_ioctl+0xb4/0xf0
>  do_syscall_64+0x3b/0x90
>  entry_SYSCALL_64_after_hwframe+0x46/0xb0
>
> Reported-by: Li Zhijian <lizhijian@fujitsu.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Ming Lei <ming.lei@redhat.com>
> Cc: John Garry <john.garry@huawei.com>
> Fixes: 65ca846a5314 ("scsi: core: Introduce {init,exit}_cmd_priv()")
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>

Awesome, It works for me.

Tested-by: Li Zhijian<lizhijian@fujitsu.com>
Bart Van Assche June 28, 2022, 9:56 p.m. UTC | #8
On 6/27/22 21:41, lizhijian@fujitsu.com wrote:
> Awesome, It works for me.
> 
> Tested-by: Li Zhijian<lizhijian@fujitsu.com>

Thanks for having tested this patch :-) This patch has been posted on the
linux-scsi mailing list. See also
https://lore.kernel.org/linux-scsi/20220628175612.2157218-1-bvanassche@acm.org/T/#u

Bart.
diff mbox series

Patch

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 6058abf42ba7..5981a0ea7a19 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -979,6 +979,8 @@  static int srp_exit_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
 				    DMA_TO_DEVICE);
 	}
 	kfree(req->indirect_desc);
+	/* pair with get_device() in srp_init_cmd_priv() */
+	put_device(&ibdev->dev);
 
 	return 0;
 }
@@ -1006,12 +1008,14 @@  static int srp_init_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
 				     target->indirect_size,
 				     DMA_TO_DEVICE);
 	if (ib_dma_mapping_error(ibdev, dma_addr)) {
+		get_device(&ibdev->dev);
 		srp_exit_cmd_priv(shost, cmd);
 		goto out;
 	}
 
 	req->indirect_dma_addr = dma_addr;
 	ret = 0;
+	get_device(&ibdev->dev);
 
 out:
 	return ret;