diff mbox series

[RFC,2/2] KVM: add initial support for ioregionfd blocking read/write operations

Message ID a13b23ca540a8846891895462d2fb139ec597237.1609231374.git.eafanasova@gmail.com (mailing list archive)
State New, archived
Headers show
Series Introduce MMIO/PIO dispatch file descriptors (ioregionfd) | expand

Commit Message

Elena Afanasova Dec. 29, 2020, 10:02 a.m. UTC
Signed-off-by: Elena Afanasova <eafanasova@gmail.com>
---
 virt/kvm/ioregion.c | 157 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 157 insertions(+)

Comments

Stefan Hajnoczi Dec. 29, 2020, noon UTC | #1
On Tue, Dec 29, 2020 at 01:02:44PM +0300, Elena Afanasova wrote:
> Signed-off-by: Elena Afanasova <eafanasova@gmail.com>
> ---
>  virt/kvm/ioregion.c | 157 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 157 insertions(+)
> 
> diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c
> index a200c3761343..8523f4126337 100644
> --- a/virt/kvm/ioregion.c
> +++ b/virt/kvm/ioregion.c
> @@ -4,6 +4,33 @@
>  #include <kvm/iodev.h>
>  #include "eventfd.h"
>  
> +/* Wire protocol */
> +struct ioregionfd_cmd {
> +	__u32 info;
> +	__u32 padding;
> +	__u64 user_data;
> +	__u64 offset;
> +	__u64 data;
> +};
> +
> +struct ioregionfd_resp {
> +	__u64 data;
> +	__u8 pad[24];
> +};
> +
> +#define IOREGIONFD_CMD_READ    0
> +#define IOREGIONFD_CMD_WRITE   1
> +
> +#define IOREGIONFD_SIZE_8BIT   0
> +#define IOREGIONFD_SIZE_16BIT  1
> +#define IOREGIONFD_SIZE_32BIT  2
> +#define IOREGIONFD_SIZE_64BIT  3
> +
> +#define IOREGIONFD_SIZE_OFFSET 4
> +#define IOREGIONFD_RESP_OFFSET 6
> +#define IOREGIONFD_SIZE(x) ((x) << IOREGIONFD_SIZE_OFFSET)
> +#define IOREGIONFD_RESP(x) ((x) << IOREGIONFD_RESP_OFFSET)

These belong in the uapi header so userspace also has struct
ioregionfd_cmd, struct ioregionfd_resp, etc.

> +
>  void
>  kvm_ioregionfd_init(struct kvm *kvm)
>  {
> @@ -38,10 +65,100 @@ ioregion_release(struct ioregion *p)
>  	kfree(p);
>  }
>  
> +static bool
> +pack_cmd(struct ioregionfd_cmd *cmd, u64 offset, u64 len, int opt, bool resp,
> +	 u64 user_data, const void *val)
> +{
> +	u64 size = 0;
> +
> +	switch (len) {
> +	case 1:
> +		size = IOREGIONFD_SIZE_8BIT;
> +		*((u8 *)&cmd->data) = val ? *(u8 *)val : 0;
> +		break;
> +	case 2:
> +		size = IOREGIONFD_SIZE_16BIT;
> +		*((u16 *)&cmd->data) = val ? *(u16 *)val : 0;
> +		break;
> +	case 4:
> +		size = IOREGIONFD_SIZE_32BIT;
> +		*((u32 *)&cmd->data) = val ? *(u32 *)val : 0;
> +		break;
> +	case 8:
> +		size = IOREGIONFD_SIZE_64BIT;
> +		*((u64 *)&cmd->data) = val ? *(u64 *)val : 0;
> +		break;
> +	default:
> +		return false;
>

The assignments and casts can be replaced with a single memcpy after the
switch statement. This is also how KVM_EXIT_MMIO and Coalesced MMIO do
it:

  memcpy(cmd->data, val, len);

However, we need to make sure that cmd has been zeroed so that kernel
memory is not accidentally exposed to userspace.

 +	}
> +	cmd->user_data = user_data;
> +	cmd->offset = offset;
> +	cmd->info |= opt;
> +	cmd->info |= IOREGIONFD_SIZE(size);
> +	cmd->info |= IOREGIONFD_RESP(resp);
> +
> +	return true;
> +}
> +
>  static int
>  ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
>  	      int len, void *val)
>  {
> +	struct ioregion *p = to_ioregion(this);
> +	struct ioregionfd_cmd *cmd;
> +	struct ioregionfd_resp *resp;
> +	size_t buf_size;
> +	void *buf;
> +	int ret = 0;
> +
> +	if ((p->rf->f_flags & O_NONBLOCK) || (p->wf->f_flags & O_NONBLOCK))
> +		return -EINVAL;

Another CPU could change file descriptor flags while we are running.
Therefore it might be simplest to handle kernel_write() and
kernel_read() -EAGAIN return values instead of trying to check.

> +	if ((addr + len - 1) > (p->paddr + p->size - 1))
> +		return -EINVAL;
> +
> +	buf_size = max_t(size_t, sizeof(*cmd), sizeof(*resp));
> +	buf = kzalloc(buf_size, GFP_KERNEL);
> +	if (!buf)
> +		return -ENOMEM;
> +	cmd = (struct ioregionfd_cmd *)buf;
> +	resp = (struct ioregionfd_resp *)buf;

I think they are small enough to declare them on the stack:

  union {
      struct ioregionfd_cmd cmd;
      struct ioregionfd_resp resp;
  } buf;

  memset(&buf, 0, sizeof(buf));

> +	if (!pack_cmd(cmd, addr - p->paddr, len, IOREGIONFD_CMD_READ,
> +		      1, p->user_data, NULL)) {
> +		kfree(buf);
> +		return -EOPNOTSUPP;
> +	}
> +
> +	ret = kernel_write(p->wf, cmd, sizeof(*cmd), 0);
> +	if (ret != sizeof(*cmd)) {
> +		kfree(buf);
> +		return (ret < 0) ? ret : -EIO;
> +	}
> +	memset(buf, 0, buf_size);
> +	ret = kernel_read(p->rf, resp, sizeof(*resp), 0);
> +	if (ret != sizeof(*resp)) {
> +		kfree(buf);
> +		return (ret < 0) ? ret : -EIO;
> +	}
> +
> +	switch (len) {
> +	case 1:
> +		*(u8 *)val = (u8)resp->data;
> +		break;
> +	case 2:
> +		*(u16 *)val = (u16)resp->data;
> +		break;
> +	case 4:
> +		*(u32 *)val = (u32)resp->data;
> +		break;
> +	case 8:
> +		*(u64 *)val = (u64)resp->data;
> +		break;
> +	default:
> +		break;
> +	}

This looks inconsistent. cmd->data is treated as a packed u8/u16/u32/864
whereas resp->data is treated as u64?

I was expecting memcpy(val, &resp->data, len) here not the u64 ->
u8/u16/u32/u64 conversion.
Elena Afanasova Dec. 30, 2020, 12:24 p.m. UTC | #2
On Tue, 2020-12-29 at 12:00 +0000, Stefan Hajnoczi wrote:
> On Tue, Dec 29, 2020 at 01:02:44PM +0300, Elena Afanasova wrote:
> > Signed-off-by: Elena Afanasova <eafanasova@gmail.com>
> > ---
> >  virt/kvm/ioregion.c | 157
> > ++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 157 insertions(+)
> > 
> > diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c
> > index a200c3761343..8523f4126337 100644
> > --- a/virt/kvm/ioregion.c
> > +++ b/virt/kvm/ioregion.c
> > @@ -4,6 +4,33 @@
> >  #include <kvm/iodev.h>
> >  #include "eventfd.h"
> >  
> > +/* Wire protocol */
> > +struct ioregionfd_cmd {
> > +	__u32 info;
> > +	__u32 padding;
> > +	__u64 user_data;
> > +	__u64 offset;
> > +	__u64 data;
> > +};
> > +
> > +struct ioregionfd_resp {
> > +	__u64 data;
> > +	__u8 pad[24];
> > +};
> > +
> > +#define IOREGIONFD_CMD_READ    0
> > +#define IOREGIONFD_CMD_WRITE   1
> > +
> > +#define IOREGIONFD_SIZE_8BIT   0
> > +#define IOREGIONFD_SIZE_16BIT  1
> > +#define IOREGIONFD_SIZE_32BIT  2
> > +#define IOREGIONFD_SIZE_64BIT  3
> > +
> > +#define IOREGIONFD_SIZE_OFFSET 4
> > +#define IOREGIONFD_RESP_OFFSET 6
> > +#define IOREGIONFD_SIZE(x) ((x) << IOREGIONFD_SIZE_OFFSET)
> > +#define IOREGIONFD_RESP(x) ((x) << IOREGIONFD_RESP_OFFSET)
> 
> These belong in the uapi header so userspace also has struct
> ioregionfd_cmd, struct ioregionfd_resp, etc.
> 
I'll move the wire protocol to a separate uapi header

> > +
> >  void
> >  kvm_ioregionfd_init(struct kvm *kvm)
> >  {
> > @@ -38,10 +65,100 @@ ioregion_release(struct ioregion *p)
> >  	kfree(p);
> >  }
> >  
> > +static bool
> > +pack_cmd(struct ioregionfd_cmd *cmd, u64 offset, u64 len, int opt,
> > bool resp,
> > +	 u64 user_data, const void *val)
> > +{
> > +	u64 size = 0;
> > +
> > +	switch (len) {
> > +	case 1:
> > +		size = IOREGIONFD_SIZE_8BIT;
> > +		*((u8 *)&cmd->data) = val ? *(u8 *)val : 0;
> > +		break;
> > +	case 2:
> > +		size = IOREGIONFD_SIZE_16BIT;
> > +		*((u16 *)&cmd->data) = val ? *(u16 *)val : 0;
> > +		break;
> > +	case 4:
> > +		size = IOREGIONFD_SIZE_32BIT;
> > +		*((u32 *)&cmd->data) = val ? *(u32 *)val : 0;
> > +		break;
> > +	case 8:
> > +		size = IOREGIONFD_SIZE_64BIT;
> > +		*((u64 *)&cmd->data) = val ? *(u64 *)val : 0;
> > +		break;
> > +	default:
> > +		return false;
> > 
> 
> The assignments and casts can be replaced with a single memcpy after
> the
> switch statement. This is also how KVM_EXIT_MMIO and Coalesced MMIO
> do
> it:
> 
>   memcpy(cmd->data, val, len);
> 
Thanks for pointing it out

> However, we need to make sure that cmd has been zeroed so that kernel
> memory is not accidentally exposed to userspace.
> 
>  +	}
> > +	cmd->user_data = user_data;
> > +	cmd->offset = offset;
> > +	cmd->info |= opt;
> > +	cmd->info |= IOREGIONFD_SIZE(size);
> > +	cmd->info |= IOREGIONFD_RESP(resp);
> > +
> > +	return true;
> > +}
> > +
> >  static int
> >  ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
> > gpa_t addr,
> >  	      int len, void *val)
> >  {
> > +	struct ioregion *p = to_ioregion(this);
> > +	struct ioregionfd_cmd *cmd;
> > +	struct ioregionfd_resp *resp;
> > +	size_t buf_size;
> > +	void *buf;
> > +	int ret = 0;
> > +
> > +	if ((p->rf->f_flags & O_NONBLOCK) || (p->wf->f_flags &
> > O_NONBLOCK))
> > +		return -EINVAL;
> 
> Another CPU could change file descriptor flags while we are running.
> Therefore it might be simplest to handle kernel_write() and
> kernel_read() -EAGAIN return values instead of trying to check.
> 
Ok, I'll fix this

> > +	if ((addr + len - 1) > (p->paddr + p->size - 1))
> > +		return -EINVAL;
> > +
> > +	buf_size = max_t(size_t, sizeof(*cmd), sizeof(*resp));
> > +	buf = kzalloc(buf_size, GFP_KERNEL);
> > +	if (!buf)
> > +		return -ENOMEM;
> > +	cmd = (struct ioregionfd_cmd *)buf;
> > +	resp = (struct ioregionfd_resp *)buf;
> 
> I think they are small enough to declare them on the stack:
> 
>   union {
>       struct ioregionfd_cmd cmd;
>       struct ioregionfd_resp resp;
>   } buf;
> 
>   memset(&buf, 0, sizeof(buf));
> 
Ok

> > +	if (!pack_cmd(cmd, addr - p->paddr, len, IOREGIONFD_CMD_READ,
> > +		      1, p->user_data, NULL)) {
> > +		kfree(buf);
> > +		return -EOPNOTSUPP;
> > +	}
> > +
> > +	ret = kernel_write(p->wf, cmd, sizeof(*cmd), 0);
> > +	if (ret != sizeof(*cmd)) {
> > +		kfree(buf);
> > +		return (ret < 0) ? ret : -EIO;
> > +	}
> > +	memset(buf, 0, buf_size);
> > +	ret = kernel_read(p->rf, resp, sizeof(*resp), 0);
> > +	if (ret != sizeof(*resp)) {
> > +		kfree(buf);
> > +		return (ret < 0) ? ret : -EIO;
> > +	}
> > +
> > +	switch (len) {
> > +	case 1:
> > +		*(u8 *)val = (u8)resp->data;
> > +		break;
> > +	case 2:
> > +		*(u16 *)val = (u16)resp->data;
> > +		break;
> > +	case 4:
> > +		*(u32 *)val = (u32)resp->data;
> > +		break;
> > +	case 8:
> > +		*(u64 *)val = (u64)resp->data;
> > +		break;
> > +	default:
> > +		break;
> > +	}
> 
> This looks inconsistent. cmd->data is treated as a packed
> u8/u16/u32/864
> whereas resp->data is treated as u64?
> 
> I was expecting memcpy(val, &resp->data, len) here not the u64 ->
> u8/u16/u32/u64 conversion.
I'll fix this, thanks!
Jason Wang Dec. 31, 2020, 3:46 a.m. UTC | #3
On 2020/12/29 下午6:02, Elena Afanasova wrote:
> Signed-off-by: Elena Afanasova<eafanasova@gmail.com>
> ---
>   virt/kvm/ioregion.c | 157 ++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 157 insertions(+)
>
> diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c
> index a200c3761343..8523f4126337 100644
> --- a/virt/kvm/ioregion.c
> +++ b/virt/kvm/ioregion.c
> @@ -4,6 +4,33 @@
>   #include <kvm/iodev.h>
>   #include "eventfd.h"
>   
> +/* Wire protocol */
> +struct ioregionfd_cmd {
> +	__u32 info;
> +	__u32 padding;
> +	__u64 user_data;
> +	__u64 offset;
> +	__u64 data;
> +};
> +


I wonder do we need a seq in the protocol. It might be useful if we 
allow a pair of file descriptors to be used for multiple different ranges.

Thanks


> +struct ioregionfd_resp {
> +	__u64 data;
> +	__u8 pad[24];
> +};
Elena Afanasova Jan. 3, 2021, 8:37 p.m. UTC | #4
On Thu, 2020-12-31 at 11:46 +0800, Jason Wang wrote:
> On 2020/12/29 下午6:02, Elena Afanasova wrote:
> > Signed-off-by: Elena Afanasova<eafanasova@gmail.com>
> > ---
> >   virt/kvm/ioregion.c | 157
> > ++++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 157 insertions(+)
> > 
> > diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c
> > index a200c3761343..8523f4126337 100644
> > --- a/virt/kvm/ioregion.c
> > +++ b/virt/kvm/ioregion.c
> > @@ -4,6 +4,33 @@
> >   #include <kvm/iodev.h>
> >   #include "eventfd.h"
> >   
> > +/* Wire protocol */
> > +struct ioregionfd_cmd {
> > +	__u32 info;
> > +	__u32 padding;
> > +	__u64 user_data;
> > +	__u64 offset;
> > +	__u64 data;
> > +};
> > +
> 
> I wonder do we need a seq in the protocol. It might be useful if we 
> allow a pair of file descriptors to be used for multiple different
> ranges.
> 
I think it might be helpful in the case of out-of-order requests. 
In the case of in order requests seq field seems not to be necessary
since there will be cmds/replies serialization. I’ll include the
synchronization code in a RFC v2 series.

> Thanks
> 
> 
> > +struct ioregionfd_resp {
> > +	__u64 data;
> > +	__u8 pad[24];
> > +};
Jason Wang Jan. 4, 2021, 5:37 a.m. UTC | #5
On 2021/1/4 上午4:37, Elena Afanasova wrote:
> On Thu, 2020-12-31 at 11:46 +0800, Jason Wang wrote:
>> On 2020/12/29 下午6:02, Elena Afanasova wrote:
>>> Signed-off-by: Elena Afanasova<eafanasova@gmail.com>
>>> ---
>>>    virt/kvm/ioregion.c | 157
>>> ++++++++++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 157 insertions(+)
>>>
>>> diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c
>>> index a200c3761343..8523f4126337 100644
>>> --- a/virt/kvm/ioregion.c
>>> +++ b/virt/kvm/ioregion.c
>>> @@ -4,6 +4,33 @@
>>>    #include <kvm/iodev.h>
>>>    #include "eventfd.h"
>>>    
>>> +/* Wire protocol */
>>> +struct ioregionfd_cmd {
>>> +	__u32 info;
>>> +	__u32 padding;
>>> +	__u64 user_data;
>>> +	__u64 offset;
>>> +	__u64 data;
>>> +};
>>> +
>> I wonder do we need a seq in the protocol. It might be useful if we
>> allow a pair of file descriptors to be used for multiple different
>> ranges.
>>
> I think it might be helpful in the case of out-of-order requests.
> In the case of in order requests seq field seems not to be necessary
> since there will be cmds/replies serialization. I’ll include the
> synchronization code in a RFC v2 series.


See my reply to V1. It might be helpful for the case of using single 
ioregionfd for multiple ranges.

Thanks


>
>> Thanks
>>
>>
>>> +struct ioregionfd_resp {
>>> +	__u64 data;
>>> +	__u8 pad[24];
>>> +};
Elena Afanasova Jan. 5, 2021, 12:06 a.m. UTC | #6
On Mon, 2021-01-04 at 13:37 +0800, Jason Wang wrote:
> On 2021/1/4 上午4:37, Elena Afanasova wrote:
> > On Thu, 2020-12-31 at 11:46 +0800, Jason Wang wrote:
> > > On 2020/12/29 下午6:02, Elena Afanasova wrote:
> > > > Signed-off-by: Elena Afanasova<eafanasova@gmail.com>
> > > > ---
> > > >    virt/kvm/ioregion.c | 157
> > > > ++++++++++++++++++++++++++++++++++++++++++++
> > > >    1 file changed, 157 insertions(+)
> > > > 
> > > > diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c
> > > > index a200c3761343..8523f4126337 100644
> > > > --- a/virt/kvm/ioregion.c
> > > > +++ b/virt/kvm/ioregion.c
> > > > @@ -4,6 +4,33 @@
> > > >    #include <kvm/iodev.h>
> > > >    #include "eventfd.h"
> > > >    
> > > > +/* Wire protocol */
> > > > +struct ioregionfd_cmd {
> > > > +	__u32 info;
> > > > +	__u32 padding;
> > > > +	__u64 user_data;
> > > > +	__u64 offset;
> > > > +	__u64 data;
> > > > +};
> > > > +
> > > I wonder do we need a seq in the protocol. It might be useful if
> > > we
> > > allow a pair of file descriptors to be used for multiple
> > > different
> > > ranges.
> > > 
> > I think it might be helpful in the case of out-of-order requests.
> > In the case of in order requests seq field seems not to be
> > necessary
> > since there will be cmds/replies serialization. I’ll include the
> > synchronization code in a RFC v2 series.
> 
> See my reply to V1. It might be helpful for the case of using single 
> ioregionfd for multiple ranges.
> 
Ok, thank you!

> Thanks
> 
> 
> > > Thanks
> > > 
> > > 
> > > > +struct ioregionfd_resp {
> > > > +	__u64 data;
> > > > +	__u8 pad[24];
> > > > +};
diff mbox series

Patch

diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c
index a200c3761343..8523f4126337 100644
--- a/virt/kvm/ioregion.c
+++ b/virt/kvm/ioregion.c
@@ -4,6 +4,33 @@ 
 #include <kvm/iodev.h>
 #include "eventfd.h"
 
+/* Wire protocol */
+struct ioregionfd_cmd {
+	__u32 info;
+	__u32 padding;
+	__u64 user_data;
+	__u64 offset;
+	__u64 data;
+};
+
+struct ioregionfd_resp {
+	__u64 data;
+	__u8 pad[24];
+};
+
+#define IOREGIONFD_CMD_READ    0
+#define IOREGIONFD_CMD_WRITE   1
+
+#define IOREGIONFD_SIZE_8BIT   0
+#define IOREGIONFD_SIZE_16BIT  1
+#define IOREGIONFD_SIZE_32BIT  2
+#define IOREGIONFD_SIZE_64BIT  3
+
+#define IOREGIONFD_SIZE_OFFSET 4
+#define IOREGIONFD_RESP_OFFSET 6
+#define IOREGIONFD_SIZE(x) ((x) << IOREGIONFD_SIZE_OFFSET)
+#define IOREGIONFD_RESP(x) ((x) << IOREGIONFD_RESP_OFFSET)
+
 void
 kvm_ioregionfd_init(struct kvm *kvm)
 {
@@ -38,10 +65,100 @@  ioregion_release(struct ioregion *p)
 	kfree(p);
 }
 
+static bool
+pack_cmd(struct ioregionfd_cmd *cmd, u64 offset, u64 len, int opt, bool resp,
+	 u64 user_data, const void *val)
+{
+	u64 size = 0;
+
+	switch (len) {
+	case 1:
+		size = IOREGIONFD_SIZE_8BIT;
+		*((u8 *)&cmd->data) = val ? *(u8 *)val : 0;
+		break;
+	case 2:
+		size = IOREGIONFD_SIZE_16BIT;
+		*((u16 *)&cmd->data) = val ? *(u16 *)val : 0;
+		break;
+	case 4:
+		size = IOREGIONFD_SIZE_32BIT;
+		*((u32 *)&cmd->data) = val ? *(u32 *)val : 0;
+		break;
+	case 8:
+		size = IOREGIONFD_SIZE_64BIT;
+		*((u64 *)&cmd->data) = val ? *(u64 *)val : 0;
+		break;
+	default:
+		return false;
+	}
+	cmd->user_data = user_data;
+	cmd->offset = offset;
+	cmd->info |= opt;
+	cmd->info |= IOREGIONFD_SIZE(size);
+	cmd->info |= IOREGIONFD_RESP(resp);
+
+	return true;
+}
+
 static int
 ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
 	      int len, void *val)
 {
+	struct ioregion *p = to_ioregion(this);
+	struct ioregionfd_cmd *cmd;
+	struct ioregionfd_resp *resp;
+	size_t buf_size;
+	void *buf;
+	int ret = 0;
+
+	if ((p->rf->f_flags & O_NONBLOCK) || (p->wf->f_flags & O_NONBLOCK))
+		return -EINVAL;
+	if ((addr + len - 1) > (p->paddr + p->size - 1))
+		return -EINVAL;
+
+	buf_size = max_t(size_t, sizeof(*cmd), sizeof(*resp));
+	buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	cmd = (struct ioregionfd_cmd *)buf;
+	resp = (struct ioregionfd_resp *)buf;
+	if (!pack_cmd(cmd, addr - p->paddr, len, IOREGIONFD_CMD_READ,
+		      1, p->user_data, NULL)) {
+		kfree(buf);
+		return -EOPNOTSUPP;
+	}
+
+	ret = kernel_write(p->wf, cmd, sizeof(*cmd), 0);
+	if (ret != sizeof(*cmd)) {
+		kfree(buf);
+		return (ret < 0) ? ret : -EIO;
+	}
+	memset(buf, 0, buf_size);
+	ret = kernel_read(p->rf, resp, sizeof(*resp), 0);
+	if (ret != sizeof(*resp)) {
+		kfree(buf);
+		return (ret < 0) ? ret : -EIO;
+	}
+
+	switch (len) {
+	case 1:
+		*(u8 *)val = (u8)resp->data;
+		break;
+	case 2:
+		*(u16 *)val = (u16)resp->data;
+		break;
+	case 4:
+		*(u32 *)val = (u32)resp->data;
+		break;
+	case 8:
+		*(u64 *)val = (u64)resp->data;
+		break;
+	default:
+		break;
+	}
+
+	kfree(buf);
+
 	return 0;
 }
 
@@ -49,6 +166,46 @@  static int
 ioregion_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
 		int len, const void *val)
 {
+	struct ioregion *p = to_ioregion(this);
+	struct ioregionfd_cmd *cmd;
+	struct ioregionfd_resp *resp;
+	size_t buf_size = 0;
+	void *buf;
+	int ret = 0;
+
+	if ((p->rf->f_flags & O_NONBLOCK) || (p->wf->f_flags & O_NONBLOCK))
+		return -EINVAL;
+	if ((addr + len - 1) > (p->paddr + p->size - 1))
+		return -EINVAL;
+
+	buf_size = max_t(size_t, sizeof(*cmd), sizeof(*resp));
+	buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	cmd = (struct ioregionfd_cmd *)buf;
+	if (!pack_cmd(cmd, addr - p->paddr, len, IOREGIONFD_CMD_WRITE,
+		      p->posted_writes ? 0 : 1, p->user_data, val)) {
+		kfree(buf);
+		return -EOPNOTSUPP;
+	}
+
+	ret = kernel_write(p->wf, cmd, sizeof(*cmd), 0);
+	if (ret != sizeof(*cmd)) {
+		kfree(buf);
+		return (ret < 0) ? ret : -EIO;
+	}
+
+	if (!p->posted_writes) {
+		memset(buf, 0, buf_size);
+		resp = (struct ioregionfd_resp *)buf;
+		ret = kernel_read(p->rf, resp, sizeof(*resp), 0);
+		if (ret != sizeof(*resp)) {
+			kfree(buf);
+			return (ret < 0) ? ret : -EIO;
+		}
+	}
+	kfree(buf);
+
 	return 0;
 }