diff mbox series

s390/cio: Refactor alloc of vfio_ccw_private

Message ID 20180920151934.35792-1-farman@linux.ibm.com (mailing list archive)
State Superseded
Headers show
Series s390/cio: Refactor alloc of vfio_ccw_private | expand

Commit Message

Eric Farman Sept. 20, 2018, 3:19 p.m. UTC
If I attach a vfio-ccw device to my guest, I get the following warning
on the host when the host kernel is CONFIG_HARDENED_USERCOPY=y

[250757.595325] Bad or missing usercopy whitelist? Kernel memory overwrite attempt detected to SLUB object 'dma-kmalloc-512' (offset 64, size 124)!
[250757.595365] WARNING: CPU: 2 PID: 10958 at mm/usercopy.c:81 usercopy_warn+0xac/0xd8
[250757.595369] Modules linked in: kvm vhost_net vhost tap xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack libcrc32c devlink tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables sunrpc dm_multipath s390_trng crc32_vx_s390 ghash_s390 prng aes_s390 des_s390 des_generic sha512_s390 sha1_s390 eadm_sch tape_3590 tape tape_class qeth_l2 qeth ccwgroup vfio_ccw vfio_mdev zcrypt_cex4 mdev vfio_iommu_type1 zcrypt vfio sha256_s390 sha_common zfcp scsi_transport_fc qdio dasd_eckd_mod dasd_mod
[250757.595424] CPU: 2 PID: 10958 Comm: CPU 2/KVM Not tainted 4.18.0-derp #2
[250757.595426] Hardware name: IBM 3906 M05 780 (LPAR)
...snip regs...
[250757.595523] Call Trace:
[250757.595529] ([<0000000000349210>] usercopy_warn+0xa8/0xd8)
[250757.595535]  [<000000000032daaa>] __check_heap_object+0xfa/0x160
[250757.595540]  [<0000000000349396>] __check_object_size+0x156/0x1d0
[250757.595547]  [<000003ff80332d04>] vfio_ccw_mdev_write+0x74/0x148 [vfio_ccw]
[250757.595552]  [<000000000034ed12>] __vfs_write+0x3a/0x188
[250757.595556]  [<000000000034f040>] vfs_write+0xa8/0x1b8
[250757.595559]  [<000000000034f4e6>] ksys_pwrite64+0x86/0xc0
[250757.595568]  [<00000000008959a0>] system_call+0xdc/0x2b0
[250757.595570] Last Breaking-Event-Address:
[250757.595573]  [<0000000000349210>] usercopy_warn+0xa8/0xd8

While vfio_ccw_mdev_{write|read} validates that the input position/count
does not run over the ccw_io_region struct, the usercopy code that does
copy_{to|from}_user doesn't necessarily know this. It sees the variable
length and gets worried that it's affecting a normal kmalloc'd struct,
and generates the above warning.

Adjust how the vfio_ccw_struct is alloc'd, with a whitelist for the
ccw_io_region within it, to remove this warning. The boundary checking
will continue to do its thing.

Signed-off-by: Eric Farman <farman@linux.ibm.com>
---
 drivers/s390/cio/vfio_ccw_drv.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

Comments

Cornelia Huck Sept. 21, 2018, 11:56 a.m. UTC | #1
On Thu, 20 Sep 2018 17:19:34 +0200
Eric Farman <farman@linux.ibm.com> wrote:

> If I attach a vfio-ccw device to my guest, I get the following warning
> on the host when the host kernel is CONFIG_HARDENED_USERCOPY=y

Maybe I should try building with that on my systems as well :)

> 
> [250757.595325] Bad or missing usercopy whitelist? Kernel memory overwrite attempt detected to SLUB object 'dma-kmalloc-512' (offset 64, size 124)!
> [250757.595365] WARNING: CPU: 2 PID: 10958 at mm/usercopy.c:81 usercopy_warn+0xac/0xd8
> [250757.595369] Modules linked in: kvm vhost_net vhost tap xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack libcrc32c devlink tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables sunrpc dm_multipath s390_trng crc32_vx_s390 ghash_s390 prng aes_s390 des_s390 des_generic sha512_s390 sha1_s390 eadm_sch tape_3590 tape tape_class qeth_l2 qeth ccwgroup vfio_ccw vfio_mdev zcrypt_cex4 mdev vfio_iommu_type1 zcrypt vfio sha256_s390 sha_common zfcp scsi_transport_fc qdio dasd_eckd_mod dasd_mod
> [250757.595424] CPU: 2 PID: 10958 Comm: CPU 2/KVM Not tainted 4.18.0-derp #2
> [250757.595426] Hardware name: IBM 3906 M05 780 (LPAR)
> ...snip regs...
> [250757.595523] Call Trace:
> [250757.595529] ([<0000000000349210>] usercopy_warn+0xa8/0xd8)
> [250757.595535]  [<000000000032daaa>] __check_heap_object+0xfa/0x160
> [250757.595540]  [<0000000000349396>] __check_object_size+0x156/0x1d0
> [250757.595547]  [<000003ff80332d04>] vfio_ccw_mdev_write+0x74/0x148 [vfio_ccw]
> [250757.595552]  [<000000000034ed12>] __vfs_write+0x3a/0x188
> [250757.595556]  [<000000000034f040>] vfs_write+0xa8/0x1b8
> [250757.595559]  [<000000000034f4e6>] ksys_pwrite64+0x86/0xc0
> [250757.595568]  [<00000000008959a0>] system_call+0xdc/0x2b0
> [250757.595570] Last Breaking-Event-Address:
> [250757.595573]  [<0000000000349210>] usercopy_warn+0xa8/0xd8
> 
> While vfio_ccw_mdev_{write|read} validates that the input position/count
> does not run over the ccw_io_region struct, the usercopy code that does
> copy_{to|from}_user doesn't necessarily know this. It sees the variable
> length and gets worried that it's affecting a normal kmalloc'd struct,
> and generates the above warning.
> 
> Adjust how the vfio_ccw_struct is alloc'd, with a whitelist for the
> ccw_io_region within it, to remove this warning. The boundary checking
> will continue to do its thing.
> 
> Signed-off-by: Eric Farman <farman@linux.ibm.com>
> ---
>  drivers/s390/cio/vfio_ccw_drv.c | 21 ++++++++++++++++++---
>  1 file changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
> index 770fa9cfc310..8191adbf3490 100644
> --- a/drivers/s390/cio/vfio_ccw_drv.c
> +++ b/drivers/s390/cio/vfio_ccw_drv.c
> @@ -22,6 +22,10 @@
>  #include "vfio_ccw_private.h"
>  
>  struct workqueue_struct *vfio_ccw_work_q;
> +struct kmem_cache *vfio_private_cache;
> +
> +#define IOREGION_OFFSET offsetof(struct vfio_ccw_private, io_region)
> +#define IOREGION_SIZE sizeof_field(struct vfio_ccw_private, io_region)
>  
>  /*
>   * Helpers
> @@ -111,7 +115,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>  		return -ENODEV;
>  	}
>  
> -	private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA);
> +	private = kmem_cache_zalloc(vfio_private_cache, GFP_KERNEL | GFP_DMA);
>  	if (!private)
>  		return -ENOMEM;
>  	private->sch = sch;
> @@ -139,7 +143,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>  	cio_disable_subchannel(sch);
>  out_free:
>  	dev_set_drvdata(&sch->dev, NULL);
> -	kfree(private);
> +	kmem_cache_free(vfio_private_cache, private);
>  	return ret;
>  }
>  
> @@ -153,7 +157,7 @@ static int vfio_ccw_sch_remove(struct subchannel *sch)
>  
>  	dev_set_drvdata(&sch->dev, NULL);
>  
> -	kfree(private);
> +	kmem_cache_free(vfio_private_cache, private);
>  
>  	return 0;
>  }
> @@ -232,10 +236,20 @@ static int __init vfio_ccw_sch_init(void)
>  	if (!vfio_ccw_work_q)
>  		return -ENOMEM;
>  
> +	vfio_private_cache = kmem_cache_create_usercopy("vfio_ccw_private",
> +					sizeof(struct vfio_ccw_private),
> +					0, SLAB_ACCOUNT, IOREGION_OFFSET,
> +					IOREGION_SIZE, NULL);

That should work fine, but I'm currently (...) trying to add more
regions (for example, for halt/clear handling) and I'm wondering
whether we should change how we allocate our I/O regions, for example
using a dedicated region that is pointed to by the private structure.
Thoughts?

> +	if (!vfio_private_cache) {
> +		destroy_workqueue(vfio_ccw_work_q);
> +		return -ENOMEM;
> +	}
> +
>  	isc_register(VFIO_CCW_ISC);
>  	ret = css_driver_register(&vfio_ccw_sch_driver);
>  	if (ret) {
>  		isc_unregister(VFIO_CCW_ISC);
> +		kmem_cache_destroy(vfio_private_cache);
>  		destroy_workqueue(vfio_ccw_work_q);
>  	}
>  
> @@ -246,6 +260,7 @@ static void __exit vfio_ccw_sch_exit(void)
>  {
>  	css_driver_unregister(&vfio_ccw_sch_driver);
>  	isc_unregister(VFIO_CCW_ISC);
> +	kmem_cache_destroy(vfio_private_cache);
>  	destroy_workqueue(vfio_ccw_work_q);
>  }
>  module_init(vfio_ccw_sch_init);
Eric Farman Sept. 21, 2018, 1:40 p.m. UTC | #2
On 09/21/2018 07:56 AM, Cornelia Huck wrote:
> On Thu, 20 Sep 2018 17:19:34 +0200
> Eric Farman <farman@linux.ibm.com> wrote:
> 
>> If I attach a vfio-ccw device to my guest, I get the following warning
>> on the host when the host kernel is CONFIG_HARDENED_USERCOPY=y
> 
> Maybe I should try building with that on my systems as well :)

:)

> 
>>
>> [250757.595325] Bad or missing usercopy whitelist? Kernel memory overwrite attempt detected to SLUB object 'dma-kmalloc-512' (offset 64, size 124)!
>> [250757.595365] WARNING: CPU: 2 PID: 10958 at mm/usercopy.c:81 usercopy_warn+0xac/0xd8
>> [250757.595369] Modules linked in: kvm vhost_net vhost tap xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack libcrc32c devlink tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables sunrpc dm_multipath s390_trng crc32_vx_s390 ghash_s390 prng aes_s390 des_s390 des_generic sha512_s390 sha1_s390 eadm_sch tape_3590 tape tape_class qeth_l2 qeth ccwgroup vfio_ccw vfio_mdev zcrypt_cex4 mdev vfio_iommu_type1 zcrypt vfio sha256_s390 sha_common zfcp scsi_transport_fc qdio dasd_eckd_mod dasd_mod
>> [250757.595424] CPU: 2 PID: 10958 Comm: CPU 2/KVM Not tainted 4.18.0-derp #2
>> [250757.595426] Hardware name: IBM 3906 M05 780 (LPAR)
>> ...snip regs...
>> [250757.595523] Call Trace:
>> [250757.595529] ([<0000000000349210>] usercopy_warn+0xa8/0xd8)
>> [250757.595535]  [<000000000032daaa>] __check_heap_object+0xfa/0x160
>> [250757.595540]  [<0000000000349396>] __check_object_size+0x156/0x1d0
>> [250757.595547]  [<000003ff80332d04>] vfio_ccw_mdev_write+0x74/0x148 [vfio_ccw]
>> [250757.595552]  [<000000000034ed12>] __vfs_write+0x3a/0x188
>> [250757.595556]  [<000000000034f040>] vfs_write+0xa8/0x1b8
>> [250757.595559]  [<000000000034f4e6>] ksys_pwrite64+0x86/0xc0
>> [250757.595568]  [<00000000008959a0>] system_call+0xdc/0x2b0
>> [250757.595570] Last Breaking-Event-Address:
>> [250757.595573]  [<0000000000349210>] usercopy_warn+0xa8/0xd8
>>
>> While vfio_ccw_mdev_{write|read} validates that the input position/count
>> does not run over the ccw_io_region struct, the usercopy code that does
>> copy_{to|from}_user doesn't necessarily know this. It sees the variable
>> length and gets worried that it's affecting a normal kmalloc'd struct,
>> and generates the above warning.
>>
>> Adjust how the vfio_ccw_struct is alloc'd, with a whitelist for the
>> ccw_io_region within it, to remove this warning. The boundary checking
>> will continue to do its thing.
>>
>> Signed-off-by: Eric Farman <farman@linux.ibm.com>
>> ---
>>   drivers/s390/cio/vfio_ccw_drv.c | 21 ++++++++++++++++++---
>>   1 file changed, 18 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
>> index 770fa9cfc310..8191adbf3490 100644
>> --- a/drivers/s390/cio/vfio_ccw_drv.c
>> +++ b/drivers/s390/cio/vfio_ccw_drv.c
>> @@ -22,6 +22,10 @@
>>   #include "vfio_ccw_private.h"
>>   
>>   struct workqueue_struct *vfio_ccw_work_q;
>> +struct kmem_cache *vfio_private_cache;
>> +
>> +#define IOREGION_OFFSET offsetof(struct vfio_ccw_private, io_region)
>> +#define IOREGION_SIZE sizeof_field(struct vfio_ccw_private, io_region)
>>   
>>   /*
>>    * Helpers
>> @@ -111,7 +115,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>>   		return -ENODEV;
>>   	}
>>   
>> -	private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA);
>> +	private = kmem_cache_zalloc(vfio_private_cache, GFP_KERNEL | GFP_DMA);
>>   	if (!private)
>>   		return -ENOMEM;
>>   	private->sch = sch;
>> @@ -139,7 +143,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>>   	cio_disable_subchannel(sch);
>>   out_free:
>>   	dev_set_drvdata(&sch->dev, NULL);
>> -	kfree(private);
>> +	kmem_cache_free(vfio_private_cache, private);
>>   	return ret;
>>   }
>>   
>> @@ -153,7 +157,7 @@ static int vfio_ccw_sch_remove(struct subchannel *sch)
>>   
>>   	dev_set_drvdata(&sch->dev, NULL);
>>   
>> -	kfree(private);
>> +	kmem_cache_free(vfio_private_cache, private);
>>   
>>   	return 0;
>>   }
>> @@ -232,10 +236,20 @@ static int __init vfio_ccw_sch_init(void)
>>   	if (!vfio_ccw_work_q)
>>   		return -ENOMEM;
>>   
>> +	vfio_private_cache = kmem_cache_create_usercopy("vfio_ccw_private",
>> +					sizeof(struct vfio_ccw_private),
>> +					0, SLAB_ACCOUNT, IOREGION_OFFSET,
>> +					IOREGION_SIZE, NULL);
> 
> That should work fine, but I'm currently (...) trying to add more
> regions (for example, for halt/clear handling) and I'm wondering
> whether we should change how we allocate our I/O regions, for example
> using a dedicated region that is pointed to by the private structure.
> Thoughts?

That would definitely make this a bit more future proof.  What would be 
in the new regions, that's not in the ccw_io_region already?  (Which is 
an orb and an irb, and for some reason another scsw).

> 
>> +	if (!vfio_private_cache) {
>> +		destroy_workqueue(vfio_ccw_work_q);
>> +		return -ENOMEM;
>> +	}
>> +
>>   	isc_register(VFIO_CCW_ISC);
>>   	ret = css_driver_register(&vfio_ccw_sch_driver);
>>   	if (ret) {
>>   		isc_unregister(VFIO_CCW_ISC);
>> +		kmem_cache_destroy(vfio_private_cache);
>>   		destroy_workqueue(vfio_ccw_work_q);
>>   	}
>>   
>> @@ -246,6 +260,7 @@ static void __exit vfio_ccw_sch_exit(void)
>>   {
>>   	css_driver_unregister(&vfio_ccw_sch_driver);
>>   	isc_unregister(VFIO_CCW_ISC);
>> +	kmem_cache_destroy(vfio_private_cache);
>>   	destroy_workqueue(vfio_ccw_work_q);
>>   }
>>   module_init(vfio_ccw_sch_init);
>
Cornelia Huck Sept. 24, 2018, 9:21 a.m. UTC | #3
On Fri, 21 Sep 2018 09:40:09 -0400
Eric Farman <farman@linux.ibm.com> wrote:

> On 09/21/2018 07:56 AM, Cornelia Huck wrote:
> > On Thu, 20 Sep 2018 17:19:34 +0200
> > Eric Farman <farman@linux.ibm.com> wrote:

> >> +	vfio_private_cache = kmem_cache_create_usercopy("vfio_ccw_private",
> >> +					sizeof(struct vfio_ccw_private),
> >> +					0, SLAB_ACCOUNT, IOREGION_OFFSET,
> >> +					IOREGION_SIZE, NULL);  
> > 
> > That should work fine, but I'm currently (...) trying to add more
> > regions (for example, for halt/clear handling) and I'm wondering
> > whether we should change how we allocate our I/O regions, for example
> > using a dedicated region that is pointed to by the private structure.
> > Thoughts?  
> 
> That would definitely make this a bit more future proof.  What would be 
> in the new regions, that's not in the ccw_io_region already?  (Which is 
> an orb and an irb, and for some reason another scsw).

The idea is not to include more data (at least for my current use
case), but rather to switch to a structure that allows user space to
specify a command (and sidestep the whole question about whether the
scsw is a real scsw etc.). We'll keep the existing region for ssch, but
I have something that is nearly ready that introduces a new structure
guarded by a capability chain that is used for handling hsch/csch (and
that I'll post if I ever find a spare minute.) Other possible uses are
path handling and other things.
diff mbox series

Patch

diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 770fa9cfc310..8191adbf3490 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -22,6 +22,10 @@ 
 #include "vfio_ccw_private.h"
 
 struct workqueue_struct *vfio_ccw_work_q;
+struct kmem_cache *vfio_private_cache;
+
+#define IOREGION_OFFSET offsetof(struct vfio_ccw_private, io_region)
+#define IOREGION_SIZE sizeof_field(struct vfio_ccw_private, io_region)
 
 /*
  * Helpers
@@ -111,7 +115,7 @@  static int vfio_ccw_sch_probe(struct subchannel *sch)
 		return -ENODEV;
 	}
 
-	private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA);
+	private = kmem_cache_zalloc(vfio_private_cache, GFP_KERNEL | GFP_DMA);
 	if (!private)
 		return -ENOMEM;
 	private->sch = sch;
@@ -139,7 +143,7 @@  static int vfio_ccw_sch_probe(struct subchannel *sch)
 	cio_disable_subchannel(sch);
 out_free:
 	dev_set_drvdata(&sch->dev, NULL);
-	kfree(private);
+	kmem_cache_free(vfio_private_cache, private);
 	return ret;
 }
 
@@ -153,7 +157,7 @@  static int vfio_ccw_sch_remove(struct subchannel *sch)
 
 	dev_set_drvdata(&sch->dev, NULL);
 
-	kfree(private);
+	kmem_cache_free(vfio_private_cache, private);
 
 	return 0;
 }
@@ -232,10 +236,20 @@  static int __init vfio_ccw_sch_init(void)
 	if (!vfio_ccw_work_q)
 		return -ENOMEM;
 
+	vfio_private_cache = kmem_cache_create_usercopy("vfio_ccw_private",
+					sizeof(struct vfio_ccw_private),
+					0, SLAB_ACCOUNT, IOREGION_OFFSET,
+					IOREGION_SIZE, NULL);
+	if (!vfio_private_cache) {
+		destroy_workqueue(vfio_ccw_work_q);
+		return -ENOMEM;
+	}
+
 	isc_register(VFIO_CCW_ISC);
 	ret = css_driver_register(&vfio_ccw_sch_driver);
 	if (ret) {
 		isc_unregister(VFIO_CCW_ISC);
+		kmem_cache_destroy(vfio_private_cache);
 		destroy_workqueue(vfio_ccw_work_q);
 	}
 
@@ -246,6 +260,7 @@  static void __exit vfio_ccw_sch_exit(void)
 {
 	css_driver_unregister(&vfio_ccw_sch_driver);
 	isc_unregister(VFIO_CCW_ISC);
+	kmem_cache_destroy(vfio_private_cache);
 	destroy_workqueue(vfio_ccw_work_q);
 }
 module_init(vfio_ccw_sch_init);