diff mbox series

[v4,09/10] vfio/fsl-mc: Add read/write support for fsl-mc devices

Message ID 20200826093315.5279-10-diana.craciun@oss.nxp.com (mailing list archive)
State New, archived
Headers show
Series vfio/fsl-mc: VFIO support for FSL-MC device | expand

Commit Message

Diana Madalina Craciun Aug. 26, 2020, 9:33 a.m. UTC
The software uses a memory-mapped I/O command interface (MC portals) to
communicate with the MC hardware. This command interface is used to
discover, enumerate, configure and remove DPAA2 objects. The DPAA2
objects use MSIs, so the command interface needs to be emulated
such that the correct MSI is configured in the hardware (the guest
has the virtual MSIs).

This patch is adding read/write support for fsl-mc devices. The mc
commands are emulated by the userspace. The host is just passing
the correct command to the hardware.

Also the current patch limits userspace to write complete
64byte command once and read 64byte response by one ioctl.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>
Signed-off-by: Diana Craciun <diana.craciun@oss.nxp.com>
---
 drivers/vfio/fsl-mc/vfio_fsl_mc.c         | 115 +++++++++++++++++++++-
 drivers/vfio/fsl-mc/vfio_fsl_mc_private.h |   1 +
 2 files changed, 114 insertions(+), 2 deletions(-)

Comments

Eric Auger Sept. 4, 2020, 8:18 a.m. UTC | #1
Hi Diana,

On 8/26/20 11:33 AM, Diana Craciun wrote:
> The software uses a memory-mapped I/O command interface (MC portals) to
> communicate with the MC hardware. This command interface is used to
> discover, enumerate, configure and remove DPAA2 objects. The DPAA2
> objects use MSIs, so the command interface needs to be emulated
> such that the correct MSI is configured in the hardware (the guest
> has the virtual MSIs).
What I don't get is all the regions are mmappable too.
And this patch does not seem to introduce special handling with respect
to MSIs. Please could you clarify?
> 
> This patch is adding read/write support for fsl-mc devices. The mc
> commands are emulated by the userspace. The host is just passing
> the correct command to the hardware.
> 
> Also the current patch limits userspace to write complete
> 64byte command once and read 64byte response by one ioctl.
> 
> Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>
> Signed-off-by: Diana Craciun <diana.craciun@oss.nxp.com>
> ---
>  drivers/vfio/fsl-mc/vfio_fsl_mc.c         | 115 +++++++++++++++++++++-
>  drivers/vfio/fsl-mc/vfio_fsl_mc_private.h |   1 +
>  2 files changed, 114 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
> index 73834f488a94..27713aa86878 100644
> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
> @@ -12,6 +12,7 @@
>  #include <linux/types.h>
>  #include <linux/vfio.h>
>  #include <linux/fsl/mc.h>
> +#include <linux/delay.h>
>  
>  #include "vfio_fsl_mc_private.h"
>  
> @@ -106,6 +107,9 @@ static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
>  		vdev->regions[i].size = resource_size(res);
>  		vdev->regions[i].flags = VFIO_REGION_INFO_FLAG_MMAP;
>  		vdev->regions[i].type = mc_dev->regions[i].flags & IORESOURCE_BITS;
> +		vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
> +		if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY))
> +			vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
>  	}
>  
>  	vdev->num_regions = mc_dev->obj_desc.region_count;
> @@ -114,6 +118,11 @@ static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
>  
>  static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev)
>  {
> +	int i;
> +
> +	for (i = 0; i < vdev->num_regions; i++)
> +		iounmap(vdev->regions[i].ioaddr);
> +
>  	vdev->num_regions = 0;
>  	kfree(vdev->regions);
>  }
> @@ -311,13 +320,115 @@ static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
>  static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
>  				size_t count, loff_t *ppos)
>  {
> -	return -EINVAL;
> +	struct vfio_fsl_mc_device *vdev = device_data;
> +	unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
> +	loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
> +	struct vfio_fsl_mc_region *region;
> +	u64 data[8];
> +	int i;
> +
> +	if (index >= vdev->num_regions)
> +		return -EINVAL;
> +
> +	region = &vdev->regions[index];
> +
> +	if (!(region->flags & VFIO_REGION_INFO_FLAG_READ))
> +		return -EINVAL;
> +
> +	if (!region->ioaddr) {
> +		region->ioaddr = ioremap(region->addr, region->size);
> +		if (!region->ioaddr)
> +			return -ENOMEM;
> +	}
> +
> +	if (count != 64 || off != 0)
> +		return -EINVAL;
> +
> +	for (i = 7; i >= 0; i--)
> +		data[i] = readq(region->ioaddr + i * sizeof(uint64_t));
> +
> +	if (copy_to_user(buf, data, 64))
> +		return -EFAULT;
> +
> +	return count;
> +}
> +
> +#define MC_CMD_COMPLETION_TIMEOUT_MS    5000
> +#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS    500
> +
> +static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data)
> +{
> +	int i;
> +	enum mc_cmd_status status;
> +	unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000;
> +
> +	/* Write at command parameter into portal */
> +	for (i = 7; i >= 1; i--)
> +		writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t));
> +
> +	/* Write command header in the end */
> +	writeq(cmd_data[0], ioaddr);
> +
> +	/* Wait for response before returning to user-space
> +	 * This can be optimized in future to even prepare response
> +	 * before returning to user-space and avoid read ioctl.
> +	 */
> +	for (;;) {
> +		u64 header;
> +		struct mc_cmd_header *resp_hdr;
> +
> +		header = cpu_to_le64(readq_relaxed(ioaddr));
> +
> +		resp_hdr = (struct mc_cmd_header *)&header;
> +		status = (enum mc_cmd_status)resp_hdr->status;
> +		if (status != MC_CMD_STATUS_READY)
> +			break;
> +
> +		udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
> +		timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
> +		if (timeout_usecs == 0)
> +			return -ETIMEDOUT;
> +	}
> +
> +	return 0;
>  }
>  
>  static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf,
>  				 size_t count, loff_t *ppos)
>  {
> -	return -EINVAL;
> +	struct vfio_fsl_mc_device *vdev = device_data;
> +	unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
> +	loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
> +	struct vfio_fsl_mc_region *region;
> +	u64 data[8];
> +	int ret;
> +
> +	if (index >= vdev->num_regions)
> +		return -EINVAL;
> +
> +	region = &vdev->regions[index];
> +
> +	if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE))
> +		return -EINVAL;
> +
> +	if (!region->ioaddr) {
> +		region->ioaddr = ioremap(region->addr, region->size);
> +		if (!region->ioaddr)
> +			return -ENOMEM;
> +	}
> +
> +	if (count != 64 || off != 0)
> +		return -EINVAL;
> +
> +	if (copy_from_user(&data, buf, 64))
> +		return -EFAULT;
> +
> +	ret = vfio_fsl_mc_send_command(region->ioaddr, data);
> +	if (ret)
> +		return ret;
> +
> +	return count;
> +
>  }
>  
>  static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
> index bbfca8b55f8a..e6804e516c4a 100644
> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
> @@ -32,6 +32,7 @@ struct vfio_fsl_mc_region {
>  	u32			type;
>  	u64			addr;
>  	resource_size_t		size;
> +	void __iomem		*ioaddr;
>  };
>  
>  struct vfio_fsl_mc_device {
> 
Thanks

Eric
Diana Madalina Craciun Sept. 7, 2020, 2:34 p.m. UTC | #2
Hi Eric,

On 9/4/2020 11:18 AM, Auger Eric wrote:
> Hi Diana,
> 
> On 8/26/20 11:33 AM, Diana Craciun wrote:
>> The software uses a memory-mapped I/O command interface (MC portals) to
>> communicate with the MC hardware. This command interface is used to
>> discover, enumerate, configure and remove DPAA2 objects. The DPAA2
>> objects use MSIs, so the command interface needs to be emulated
>> such that the correct MSI is configured in the hardware (the guest
>> has the virtual MSIs).
> What I don't get is all the regions are mmappable too.
> And this patch does not seem to introduce special handling with respect
> to MSIs. Please could you clarify?

The device can be controlled using commands issued towards a firmware. 
Most of the commands can be passthrough, among exceptions is the command 
that configures the interrupts. In a guest the interrupts are emulated 
and for the hardware the numbers in the guest mean nothing. So, in a 
virtual machine scenario, the DPMCP and DPRC regions are emulated in 
qemu such that the command which configures the interrupts will not go 
to hardware with the information set by the guest.
However there are other scenarios apart from virtual machines like DPDK 
in which the interrupt configuration command is not used. The problem 
might be that the userspace could issue the command because there is no 
restriction in the VFIO, but in that case the worst thing that may 
happen is for the interrupts for the device not to work.
However it is possible to restrict the command for this scenario as well 
if I change the code and not allow the DPRC region to be mmapable. In 
practice it proved that it might not gain much by direct assigning that 
area. Also the interrupt configuration command was restricted from the 
firmware to be issued only from the DPRC device region to help such a 
scenario.


>>
>> This patch is adding read/write support for fsl-mc devices. The mc
>> commands are emulated by the userspace. The host is just passing
>> the correct command to the hardware.
>>
>> Also the current patch limits userspace to write complete
>> 64byte command once and read 64byte response by one ioctl.
>>
>> Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>
>> Signed-off-by: Diana Craciun <diana.craciun@oss.nxp.com>
>> ---
>>   drivers/vfio/fsl-mc/vfio_fsl_mc.c         | 115 +++++++++++++++++++++-
>>   drivers/vfio/fsl-mc/vfio_fsl_mc_private.h |   1 +
>>   2 files changed, 114 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>> index 73834f488a94..27713aa86878 100644
>> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>> @@ -12,6 +12,7 @@
>>   #include <linux/types.h>
>>   #include <linux/vfio.h>
>>   #include <linux/fsl/mc.h>
>> +#include <linux/delay.h>
>>   
>>   #include "vfio_fsl_mc_private.h"
>>   
>> @@ -106,6 +107,9 @@ static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
>>   		vdev->regions[i].size = resource_size(res);
>>   		vdev->regions[i].flags = VFIO_REGION_INFO_FLAG_MMAP;
>>   		vdev->regions[i].type = mc_dev->regions[i].flags & IORESOURCE_BITS;
>> +		vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
>> +		if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY))
>> +			vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
>>   	}
>>   
>>   	vdev->num_regions = mc_dev->obj_desc.region_count;
>> @@ -114,6 +118,11 @@ static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
>>   
>>   static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev)
>>   {
>> +	int i;
>> +
>> +	for (i = 0; i < vdev->num_regions; i++)
>> +		iounmap(vdev->regions[i].ioaddr);
>> +
>>   	vdev->num_regions = 0;
>>   	kfree(vdev->regions);
>>   }
>> @@ -311,13 +320,115 @@ static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
>>   static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
>>   				size_t count, loff_t *ppos)
>>   {
>> -	return -EINVAL;
>> +	struct vfio_fsl_mc_device *vdev = device_data;
>> +	unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
>> +	loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
>> +	struct vfio_fsl_mc_region *region;
>> +	u64 data[8];
>> +	int i;
>> +
>> +	if (index >= vdev->num_regions)
>> +		return -EINVAL;
>> +
>> +	region = &vdev->regions[index];
>> +
>> +	if (!(region->flags & VFIO_REGION_INFO_FLAG_READ))
>> +		return -EINVAL;
>> +
>> +	if (!region->ioaddr) {
>> +		region->ioaddr = ioremap(region->addr, region->size);
>> +		if (!region->ioaddr)
>> +			return -ENOMEM;
>> +	}
>> +
>> +	if (count != 64 || off != 0)
>> +		return -EINVAL;
>> +
>> +	for (i = 7; i >= 0; i--)
>> +		data[i] = readq(region->ioaddr + i * sizeof(uint64_t));
>> +
>> +	if (copy_to_user(buf, data, 64))
>> +		return -EFAULT;
>> +
>> +	return count;
>> +}
>> +
>> +#define MC_CMD_COMPLETION_TIMEOUT_MS    5000
>> +#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS    500
>> +
>> +static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data)
>> +{
>> +	int i;
>> +	enum mc_cmd_status status;
>> +	unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000;
>> +
>> +	/* Write at command parameter into portal */
>> +	for (i = 7; i >= 1; i--)
>> +		writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t));
>> +
>> +	/* Write command header in the end */
>> +	writeq(cmd_data[0], ioaddr);
>> +
>> +	/* Wait for response before returning to user-space
>> +	 * This can be optimized in future to even prepare response
>> +	 * before returning to user-space and avoid read ioctl.
>> +	 */
>> +	for (;;) {
>> +		u64 header;
>> +		struct mc_cmd_header *resp_hdr;
>> +
>> +		header = cpu_to_le64(readq_relaxed(ioaddr));
>> +
>> +		resp_hdr = (struct mc_cmd_header *)&header;
>> +		status = (enum mc_cmd_status)resp_hdr->status;
>> +		if (status != MC_CMD_STATUS_READY)
>> +			break;
>> +
>> +		udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
>> +		timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
>> +		if (timeout_usecs == 0)
>> +			return -ETIMEDOUT;
>> +	}
>> +
>> +	return 0;
>>   }
>>   
>>   static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf,
>>   				 size_t count, loff_t *ppos)
>>   {
>> -	return -EINVAL;
>> +	struct vfio_fsl_mc_device *vdev = device_data;
>> +	unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
>> +	loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
>> +	struct vfio_fsl_mc_region *region;
>> +	u64 data[8];
>> +	int ret;
>> +
>> +	if (index >= vdev->num_regions)
>> +		return -EINVAL;
>> +
>> +	region = &vdev->regions[index];
>> +
>> +	if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE))
>> +		return -EINVAL;
>> +
>> +	if (!region->ioaddr) {
>> +		region->ioaddr = ioremap(region->addr, region->size);
>> +		if (!region->ioaddr)
>> +			return -ENOMEM;
>> +	}
>> +
>> +	if (count != 64 || off != 0)
>> +		return -EINVAL;
>> +
>> +	if (copy_from_user(&data, buf, 64))
>> +		return -EFAULT;
>> +
>> +	ret = vfio_fsl_mc_send_command(region->ioaddr, data);
>> +	if (ret)
>> +		return ret;
>> +
>> +	return count;
>> +
>>   }
>>   
>>   static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
>> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>> index bbfca8b55f8a..e6804e516c4a 100644
>> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>> @@ -32,6 +32,7 @@ struct vfio_fsl_mc_region {
>>   	u32			type;
>>   	u64			addr;
>>   	resource_size_t		size;
>> +	void __iomem		*ioaddr;
>>   };
>>   
>>   struct vfio_fsl_mc_device {
>>
> Thanks
> 
> Eric
> 

Thanks,
Diana
Eric Auger Sept. 10, 2020, 8:20 a.m. UTC | #3
Hi Diana,

On 9/7/20 4:34 PM, Diana Craciun OSS wrote:
> Hi Eric,
> 
> On 9/4/2020 11:18 AM, Auger Eric wrote:
>> Hi Diana,
>>
>> On 8/26/20 11:33 AM, Diana Craciun wrote:
>>> The software uses a memory-mapped I/O command interface (MC portals) to
>>> communicate with the MC hardware. This command interface is used to
>>> discover, enumerate, configure and remove DPAA2 objects. The DPAA2
>>> objects use MSIs, so the command interface needs to be emulated
>>> such that the correct MSI is configured in the hardware (the guest
>>> has the virtual MSIs).
>> What I don't get is all the regions are mmappable too.
>> And this patch does not seem to introduce special handling with respect
>> to MSIs. Please could you clarify?
> 
> The device can be controlled using commands issued towards a firmware.
> Most of the commands can be passthrough, among exceptions is the command
> that configures the interrupts. In a guest the interrupts are emulated
> and for the hardware the numbers in the guest mean nothing. So, in a
> virtual machine scenario, the DPMCP and DPRC regions are emulated in
> qemu such that the command which configures the interrupts will not go
> to hardware with the information set by the guest.
> However there are other scenarios apart from virtual machines like DPDK
> in which the interrupt configuration command is not used. The problem
> might be that the userspace could issue the command because there is no
> restriction in the VFIO, but in that case the worst thing that may
> happen is for the interrupts for the device not to work.
> However it is possible to restrict the command for this scenario as well
> if I change the code and not allow the DPRC region to be mmapable. In
> practice it proved that it might not gain much by direct assigning that
> area. Also the interrupt configuration command was restricted from the
> firmware to be issued only from the DPRC device region to help such a
> scenario.
Yes actually I meant that the region used to configure MSIs should not
be mmappable then?


Thanks

Eric
> 
> 
>>>
>>> This patch is adding read/write support for fsl-mc devices. The mc
>>> commands are emulated by the userspace. The host is just passing
>>> the correct command to the hardware.
>>>
>>> Also the current patch limits userspace to write complete
>>> 64byte command once and read 64byte response by one ioctl.
>>>
>>> Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>
>>> Signed-off-by: Diana Craciun <diana.craciun@oss.nxp.com>
>>> ---
>>>   drivers/vfio/fsl-mc/vfio_fsl_mc.c         | 115 +++++++++++++++++++++-
>>>   drivers/vfio/fsl-mc/vfio_fsl_mc_private.h |   1 +
>>>   2 files changed, 114 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>> b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>> index 73834f488a94..27713aa86878 100644
>>> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>> @@ -12,6 +12,7 @@
>>>   #include <linux/types.h>
>>>   #include <linux/vfio.h>
>>>   #include <linux/fsl/mc.h>
>>> +#include <linux/delay.h>
>>>     #include "vfio_fsl_mc_private.h"
>>>   @@ -106,6 +107,9 @@ static int vfio_fsl_mc_regions_init(struct
>>> vfio_fsl_mc_device *vdev)
>>>           vdev->regions[i].size = resource_size(res);
>>>           vdev->regions[i].flags = VFIO_REGION_INFO_FLAG_MMAP;
>>>           vdev->regions[i].type = mc_dev->regions[i].flags &
>>> IORESOURCE_BITS;
>>> +        vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
>>> +        if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY))
>>> +            vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
>>>       }
>>>         vdev->num_regions = mc_dev->obj_desc.region_count;
>>> @@ -114,6 +118,11 @@ static int vfio_fsl_mc_regions_init(struct
>>> vfio_fsl_mc_device *vdev)
>>>     static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device
>>> *vdev)
>>>   {
>>> +    int i;
>>> +
>>> +    for (i = 0; i < vdev->num_regions; i++)
>>> +        iounmap(vdev->regions[i].ioaddr);
>>> +
>>>       vdev->num_regions = 0;
>>>       kfree(vdev->regions);
>>>   }
>>> @@ -311,13 +320,115 @@ static long vfio_fsl_mc_ioctl(void
>>> *device_data, unsigned int cmd,
>>>   static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
>>>                   size_t count, loff_t *ppos)
>>>   {
>>> -    return -EINVAL;
>>> +    struct vfio_fsl_mc_device *vdev = device_data;
>>> +    unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
>>> +    loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
>>> +    struct vfio_fsl_mc_region *region;
>>> +    u64 data[8];
>>> +    int i;
>>> +
>>> +    if (index >= vdev->num_regions)
>>> +        return -EINVAL;
>>> +
>>> +    region = &vdev->regions[index];
>>> +
>>> +    if (!(region->flags & VFIO_REGION_INFO_FLAG_READ))
>>> +        return -EINVAL;
>>> +
>>> +    if (!region->ioaddr) {
>>> +        region->ioaddr = ioremap(region->addr, region->size);
>>> +        if (!region->ioaddr)
>>> +            return -ENOMEM;
>>> +    }
>>> +
>>> +    if (count != 64 || off != 0)
>>> +        return -EINVAL;
>>> +
>>> +    for (i = 7; i >= 0; i--)
>>> +        data[i] = readq(region->ioaddr + i * sizeof(uint64_t));
>>> +
>>> +    if (copy_to_user(buf, data, 64))
>>> +        return -EFAULT;
>>> +
>>> +    return count;
>>> +}
>>> +
>>> +#define MC_CMD_COMPLETION_TIMEOUT_MS    5000
>>> +#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS    500
>>> +
>>> +static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t
>>> *cmd_data)
>>> +{
>>> +    int i;
>>> +    enum mc_cmd_status status;
>>> +    unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000;
>>> +
>>> +    /* Write at command parameter into portal */
>>> +    for (i = 7; i >= 1; i--)
>>> +        writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t));
>>> +
>>> +    /* Write command header in the end */
>>> +    writeq(cmd_data[0], ioaddr);
>>> +
>>> +    /* Wait for response before returning to user-space
>>> +     * This can be optimized in future to even prepare response
>>> +     * before returning to user-space and avoid read ioctl.
>>> +     */
>>> +    for (;;) {
>>> +        u64 header;
>>> +        struct mc_cmd_header *resp_hdr;
>>> +
>>> +        header = cpu_to_le64(readq_relaxed(ioaddr));
>>> +
>>> +        resp_hdr = (struct mc_cmd_header *)&header;
>>> +        status = (enum mc_cmd_status)resp_hdr->status;
>>> +        if (status != MC_CMD_STATUS_READY)
>>> +            break;
>>> +
>>> +        udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
>>> +        timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
>>> +        if (timeout_usecs == 0)
>>> +            return -ETIMEDOUT;
>>> +    }
>>> +
>>> +    return 0;
>>>   }
>>>     static ssize_t vfio_fsl_mc_write(void *device_data, const char
>>> __user *buf,
>>>                    size_t count, loff_t *ppos)
>>>   {
>>> -    return -EINVAL;
>>> +    struct vfio_fsl_mc_device *vdev = device_data;
>>> +    unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
>>> +    loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
>>> +    struct vfio_fsl_mc_region *region;
>>> +    u64 data[8];
>>> +    int ret;
>>> +
>>> +    if (index >= vdev->num_regions)
>>> +        return -EINVAL;
>>> +
>>> +    region = &vdev->regions[index];
>>> +
>>> +    if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE))
>>> +        return -EINVAL;
>>> +
>>> +    if (!region->ioaddr) {
>>> +        region->ioaddr = ioremap(region->addr, region->size);
>>> +        if (!region->ioaddr)
>>> +            return -ENOMEM;
>>> +    }
>>> +
>>> +    if (count != 64 || off != 0)
>>> +        return -EINVAL;
>>> +
>>> +    if (copy_from_user(&data, buf, 64))
>>> +        return -EFAULT;
>>> +
>>> +    ret = vfio_fsl_mc_send_command(region->ioaddr, data);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>> +    return count;
>>> +
>>>   }
>>>     static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
>>> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>> b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>> index bbfca8b55f8a..e6804e516c4a 100644
>>> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>> @@ -32,6 +32,7 @@ struct vfio_fsl_mc_region {
>>>       u32            type;
>>>       u64            addr;
>>>       resource_size_t        size;
>>> +    void __iomem        *ioaddr;
>>>   };
>>>     struct vfio_fsl_mc_device {
>>>
>> Thanks
>>
>> Eric
>>
> 
> Thanks,
> Diana
>
Diana Madalina Craciun Sept. 11, 2020, 9:15 a.m. UTC | #4
Hi Eric,

On 9/10/2020 11:20 AM, Auger Eric wrote:
> Hi Diana,
> 
> On 9/7/20 4:34 PM, Diana Craciun OSS wrote:
>> Hi Eric,
>>
>> On 9/4/2020 11:18 AM, Auger Eric wrote:
>>> Hi Diana,
>>>
>>> On 8/26/20 11:33 AM, Diana Craciun wrote:
>>>> The software uses a memory-mapped I/O command interface (MC portals) to
>>>> communicate with the MC hardware. This command interface is used to
>>>> discover, enumerate, configure and remove DPAA2 objects. The DPAA2
>>>> objects use MSIs, so the command interface needs to be emulated
>>>> such that the correct MSI is configured in the hardware (the guest
>>>> has the virtual MSIs).
>>> What I don't get is all the regions are mmappable too.
>>> And this patch does not seem to introduce special handling with respect
>>> to MSIs. Please could you clarify?
>>
>> The device can be controlled using commands issued towards a firmware.
>> Most of the commands can be passthrough, among exceptions is the command
>> that configures the interrupts. In a guest the interrupts are emulated
>> and for the hardware the numbers in the guest mean nothing. So, in a
>> virtual machine scenario, the DPMCP and DPRC regions are emulated in
>> qemu such that the command which configures the interrupts will not go
>> to hardware with the information set by the guest.
>> However there are other scenarios apart from virtual machines like DPDK
>> in which the interrupt configuration command is not used. The problem
>> might be that the userspace could issue the command because there is no
>> restriction in the VFIO, but in that case the worst thing that may
>> happen is for the interrupts for the device not to work.
>> However it is possible to restrict the command for this scenario as well
>> if I change the code and not allow the DPRC region to be mmapable. In
>> practice it proved that it might not gain much by direct assigning that
>> area. Also the interrupt configuration command was restricted from the
>> firmware to be issued only from the DPRC device region to help such a
>> scenario.
> Yes actually I meant that the region used to configure MSIs should not
> be mmappable then?
> 
> 

That region is not used only for the MSI configuration. The way it works 
is through commands that are written at a certain memory address. And 
the commands can be different. Applications like DPDK do not use the 
command to configure interrupts, so that is the reason that the region 
is mmapable. But at a second thought, I think that I can restrict the 
DPRC region not to be mmapable. And that is the only region that can be 
used to configure interrupts.

Thanks,

Diana

> Thanks
> 
> Eric
>>
>>
>>>>
>>>> This patch is adding read/write support for fsl-mc devices. The mc
>>>> commands are emulated by the userspace. The host is just passing
>>>> the correct command to the hardware.
>>>>
>>>> Also the current patch limits userspace to write complete
>>>> 64byte command once and read 64byte response by one ioctl.
>>>>
>>>> Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>
>>>> Signed-off-by: Diana Craciun <diana.craciun@oss.nxp.com>
>>>> ---
>>>>    drivers/vfio/fsl-mc/vfio_fsl_mc.c         | 115 +++++++++++++++++++++-
>>>>    drivers/vfio/fsl-mc/vfio_fsl_mc_private.h |   1 +
>>>>    2 files changed, 114 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>>> b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>>> index 73834f488a94..27713aa86878 100644
>>>> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>>> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>>> @@ -12,6 +12,7 @@
>>>>    #include <linux/types.h>
>>>>    #include <linux/vfio.h>
>>>>    #include <linux/fsl/mc.h>
>>>> +#include <linux/delay.h>
>>>>      #include "vfio_fsl_mc_private.h"
>>>>    @@ -106,6 +107,9 @@ static int vfio_fsl_mc_regions_init(struct
>>>> vfio_fsl_mc_device *vdev)
>>>>            vdev->regions[i].size = resource_size(res);
>>>>            vdev->regions[i].flags = VFIO_REGION_INFO_FLAG_MMAP;
>>>>            vdev->regions[i].type = mc_dev->regions[i].flags &
>>>> IORESOURCE_BITS;
>>>> +        vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
>>>> +        if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY))
>>>> +            vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
>>>>        }
>>>>          vdev->num_regions = mc_dev->obj_desc.region_count;
>>>> @@ -114,6 +118,11 @@ static int vfio_fsl_mc_regions_init(struct
>>>> vfio_fsl_mc_device *vdev)
>>>>      static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device
>>>> *vdev)
>>>>    {
>>>> +    int i;
>>>> +
>>>> +    for (i = 0; i < vdev->num_regions; i++)
>>>> +        iounmap(vdev->regions[i].ioaddr);
>>>> +
>>>>        vdev->num_regions = 0;
>>>>        kfree(vdev->regions);
>>>>    }
>>>> @@ -311,13 +320,115 @@ static long vfio_fsl_mc_ioctl(void
>>>> *device_data, unsigned int cmd,
>>>>    static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
>>>>                    size_t count, loff_t *ppos)
>>>>    {
>>>> -    return -EINVAL;
>>>> +    struct vfio_fsl_mc_device *vdev = device_data;
>>>> +    unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
>>>> +    loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
>>>> +    struct vfio_fsl_mc_region *region;
>>>> +    u64 data[8];
>>>> +    int i;
>>>> +
>>>> +    if (index >= vdev->num_regions)
>>>> +        return -EINVAL;
>>>> +
>>>> +    region = &vdev->regions[index];
>>>> +
>>>> +    if (!(region->flags & VFIO_REGION_INFO_FLAG_READ))
>>>> +        return -EINVAL;
>>>> +
>>>> +    if (!region->ioaddr) {
>>>> +        region->ioaddr = ioremap(region->addr, region->size);
>>>> +        if (!region->ioaddr)
>>>> +            return -ENOMEM;
>>>> +    }
>>>> +
>>>> +    if (count != 64 || off != 0)
>>>> +        return -EINVAL;
>>>> +
>>>> +    for (i = 7; i >= 0; i--)
>>>> +        data[i] = readq(region->ioaddr + i * sizeof(uint64_t));
>>>> +
>>>> +    if (copy_to_user(buf, data, 64))
>>>> +        return -EFAULT;
>>>> +
>>>> +    return count;
>>>> +}
>>>> +
>>>> +#define MC_CMD_COMPLETION_TIMEOUT_MS    5000
>>>> +#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS    500
>>>> +
>>>> +static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t
>>>> *cmd_data)
>>>> +{
>>>> +    int i;
>>>> +    enum mc_cmd_status status;
>>>> +    unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000;
>>>> +
>>>> +    /* Write at command parameter into portal */
>>>> +    for (i = 7; i >= 1; i--)
>>>> +        writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t));
>>>> +
>>>> +    /* Write command header in the end */
>>>> +    writeq(cmd_data[0], ioaddr);
>>>> +
>>>> +    /* Wait for response before returning to user-space
>>>> +     * This can be optimized in future to even prepare response
>>>> +     * before returning to user-space and avoid read ioctl.
>>>> +     */
>>>> +    for (;;) {
>>>> +        u64 header;
>>>> +        struct mc_cmd_header *resp_hdr;
>>>> +
>>>> +        header = cpu_to_le64(readq_relaxed(ioaddr));
>>>> +
>>>> +        resp_hdr = (struct mc_cmd_header *)&header;
>>>> +        status = (enum mc_cmd_status)resp_hdr->status;
>>>> +        if (status != MC_CMD_STATUS_READY)
>>>> +            break;
>>>> +
>>>> +        udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
>>>> +        timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
>>>> +        if (timeout_usecs == 0)
>>>> +            return -ETIMEDOUT;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>>    }
>>>>      static ssize_t vfio_fsl_mc_write(void *device_data, const char
>>>> __user *buf,
>>>>                     size_t count, loff_t *ppos)
>>>>    {
>>>> -    return -EINVAL;
>>>> +    struct vfio_fsl_mc_device *vdev = device_data;
>>>> +    unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
>>>> +    loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
>>>> +    struct vfio_fsl_mc_region *region;
>>>> +    u64 data[8];
>>>> +    int ret;
>>>> +
>>>> +    if (index >= vdev->num_regions)
>>>> +        return -EINVAL;
>>>> +
>>>> +    region = &vdev->regions[index];
>>>> +
>>>> +    if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE))
>>>> +        return -EINVAL;
>>>> +
>>>> +    if (!region->ioaddr) {
>>>> +        region->ioaddr = ioremap(region->addr, region->size);
>>>> +        if (!region->ioaddr)
>>>> +            return -ENOMEM;
>>>> +    }
>>>> +
>>>> +    if (count != 64 || off != 0)
>>>> +        return -EINVAL;
>>>> +
>>>> +    if (copy_from_user(&data, buf, 64))
>>>> +        return -EFAULT;
>>>> +
>>>> +    ret = vfio_fsl_mc_send_command(region->ioaddr, data);
>>>> +    if (ret)
>>>> +        return ret;
>>>> +
>>>> +    return count;
>>>> +
>>>>    }
>>>>      static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
>>>> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>>> b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>>> index bbfca8b55f8a..e6804e516c4a 100644
>>>> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>>> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>>> @@ -32,6 +32,7 @@ struct vfio_fsl_mc_region {
>>>>        u32            type;
>>>>        u64            addr;
>>>>        resource_size_t        size;
>>>> +    void __iomem        *ioaddr;
>>>>    };
>>>>      struct vfio_fsl_mc_device {
>>>>
>>> Thanks
>>>
>>> Eric
>>>
>>
>> Thanks,
>> Diana
>>
>
diff mbox series

Patch

diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index 73834f488a94..27713aa86878 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -12,6 +12,7 @@ 
 #include <linux/types.h>
 #include <linux/vfio.h>
 #include <linux/fsl/mc.h>
+#include <linux/delay.h>
 
 #include "vfio_fsl_mc_private.h"
 
@@ -106,6 +107,9 @@  static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
 		vdev->regions[i].size = resource_size(res);
 		vdev->regions[i].flags = VFIO_REGION_INFO_FLAG_MMAP;
 		vdev->regions[i].type = mc_dev->regions[i].flags & IORESOURCE_BITS;
+		vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
+		if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY))
+			vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
 	}
 
 	vdev->num_regions = mc_dev->obj_desc.region_count;
@@ -114,6 +118,11 @@  static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
 
 static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev)
 {
+	int i;
+
+	for (i = 0; i < vdev->num_regions; i++)
+		iounmap(vdev->regions[i].ioaddr);
+
 	vdev->num_regions = 0;
 	kfree(vdev->regions);
 }
@@ -311,13 +320,115 @@  static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
 static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
 				size_t count, loff_t *ppos)
 {
-	return -EINVAL;
+	struct vfio_fsl_mc_device *vdev = device_data;
+	unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
+	loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
+	struct vfio_fsl_mc_region *region;
+	u64 data[8];
+	int i;
+
+	if (index >= vdev->num_regions)
+		return -EINVAL;
+
+	region = &vdev->regions[index];
+
+	if (!(region->flags & VFIO_REGION_INFO_FLAG_READ))
+		return -EINVAL;
+
+	if (!region->ioaddr) {
+		region->ioaddr = ioremap(region->addr, region->size);
+		if (!region->ioaddr)
+			return -ENOMEM;
+	}
+
+	if (count != 64 || off != 0)
+		return -EINVAL;
+
+	for (i = 7; i >= 0; i--)
+		data[i] = readq(region->ioaddr + i * sizeof(uint64_t));
+
+	if (copy_to_user(buf, data, 64))
+		return -EFAULT;
+
+	return count;
+}
+
+#define MC_CMD_COMPLETION_TIMEOUT_MS    5000
+#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS    500
+
+static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data)
+{
+	int i;
+	enum mc_cmd_status status;
+	unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000;
+
+	/* Write at command parameter into portal */
+	for (i = 7; i >= 1; i--)
+		writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t));
+
+	/* Write command header in the end */
+	writeq(cmd_data[0], ioaddr);
+
+	/* Wait for response before returning to user-space
+	 * This can be optimized in future to even prepare response
+	 * before returning to user-space and avoid read ioctl.
+	 */
+	for (;;) {
+		u64 header;
+		struct mc_cmd_header *resp_hdr;
+
+		header = cpu_to_le64(readq_relaxed(ioaddr));
+
+		resp_hdr = (struct mc_cmd_header *)&header;
+		status = (enum mc_cmd_status)resp_hdr->status;
+		if (status != MC_CMD_STATUS_READY)
+			break;
+
+		udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
+		timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
+		if (timeout_usecs == 0)
+			return -ETIMEDOUT;
+	}
+
+	return 0;
 }
 
 static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf,
 				 size_t count, loff_t *ppos)
 {
-	return -EINVAL;
+	struct vfio_fsl_mc_device *vdev = device_data;
+	unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
+	loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
+	struct vfio_fsl_mc_region *region;
+	u64 data[8];
+	int ret;
+
+	if (index >= vdev->num_regions)
+		return -EINVAL;
+
+	region = &vdev->regions[index];
+
+	if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE))
+		return -EINVAL;
+
+	if (!region->ioaddr) {
+		region->ioaddr = ioremap(region->addr, region->size);
+		if (!region->ioaddr)
+			return -ENOMEM;
+	}
+
+	if (count != 64 || off != 0)
+		return -EINVAL;
+
+	if (copy_from_user(&data, buf, 64))
+		return -EFAULT;
+
+	ret = vfio_fsl_mc_send_command(region->ioaddr, data);
+	if (ret)
+		return ret;
+
+	return count;
+
 }
 
 static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
index bbfca8b55f8a..e6804e516c4a 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
@@ -32,6 +32,7 @@  struct vfio_fsl_mc_region {
 	u32			type;
 	u64			addr;
 	resource_size_t		size;
+	void __iomem		*ioaddr;
 };
 
 struct vfio_fsl_mc_device {