diff mbox

[v11,2/3] Differentiate IO/mem resources tracked by ioreq server

Message ID 1453432840-5319-3-git-send-email-yu.c.zhang@linux.intel.com
State New, archived
Headers show

Commit Message

Yu Zhang Jan. 22, 2016, 3:20 a.m. UTC
Currently in ioreq server, guest write-protected ram pages are
tracked in the same rangeset with device mmio resources. Yet
unlike device mmio, which can be in big chunks, the guest write-
protected pages may be discrete ranges with 4K bytes each. This
patch uses a seperate rangeset for the guest ram pages.

To differentiate the ioreq type between the write-protected memory
ranges and the mmio ranges when selecting an ioreq server, the p2m
type is retrieved by calling get_page_from_gfn(). And we do not
need to worry about the p2m type change during the ioreq selection
process.

Note: Previously, a new hypercall or subop was suggested to map
write-protected pages into ioreq server. However, it turned out
handler of this new hypercall would be almost the same with the
existing pair - HVMOP_[un]map_io_range_to_ioreq_server, and there's
already a type parameter in this hypercall. So no new hypercall
defined, only a new type is introduced.

Acked-by: Wei Liu <wei.liu2@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Shuai Ruan <shuai.ruan@linux.intel.com>
Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
---
 tools/libxc/include/xenctrl.h    | 31 ++++++++++++++++++++
 tools/libxc/xc_domain.c          | 61 ++++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/hvm/hvm.c           | 27 +++++++++++++++---
 xen/include/asm-x86/hvm/domain.h |  2 +-
 xen/include/public/hvm/hvm_op.h  |  1 +
 5 files changed, 117 insertions(+), 5 deletions(-)

Comments

Jan Beulich Jan. 22, 2016, 11:43 a.m. UTC | #1
>>> On 22.01.16 at 04:20, <yu.c.zhang@linux.intel.com> wrote:
> @@ -2601,6 +2605,16 @@ struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
>          type = (p->type == IOREQ_TYPE_PIO) ?
>                  HVMOP_IO_RANGE_PORT : HVMOP_IO_RANGE_MEMORY;
>          addr = p->addr;
> +        if ( type == HVMOP_IO_RANGE_MEMORY )
> +        {
> +             ram_page = get_page_from_gfn(d, p->addr >> PAGE_SHIFT,
> +                                          &p2mt, P2M_UNSHARE);

It seems to me like I had asked before: Why P2M_UNSHARE instead
of just P2M_QUERY? (This could surely be fixed up while committing,
the more that I've already done some cleanup here, but I'd like to
understand this before it goes in.)

> +             if ( p2mt == p2m_mmio_write_dm )
> +                 type = HVMOP_IO_RANGE_WP_MEM;
> +
> +             if ( ram_page )
> +                 put_page(ram_page);
> +        }
>      }
>  
>      list_for_each_entry ( s,
> @@ -2642,6 +2656,11 @@ struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
>              }
>  
>              break;
> +        case HVMOP_IO_RANGE_WP_MEM:
> +            if ( rangeset_contains_singleton(r, PFN_DOWN(addr)) )
> +                return s;

Considering you've got p2m_mmio_write_dm above - can this
validly return false here?

Jan
Yu Zhang Jan. 26, 2016, 7:59 a.m. UTC | #2
On 1/22/2016 7:43 PM, Jan Beulich wrote:
>>>> On 22.01.16 at 04:20, <yu.c.zhang@linux.intel.com> wrote:
>> @@ -2601,6 +2605,16 @@ struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
>>           type = (p->type == IOREQ_TYPE_PIO) ?
>>                   HVMOP_IO_RANGE_PORT : HVMOP_IO_RANGE_MEMORY;
>>           addr = p->addr;
>> +        if ( type == HVMOP_IO_RANGE_MEMORY )
>> +        {
>> +             ram_page = get_page_from_gfn(d, p->addr >> PAGE_SHIFT,
>> +                                          &p2mt, P2M_UNSHARE);
>
> It seems to me like I had asked before: Why P2M_UNSHARE instead
> of just P2M_QUERY? (This could surely be fixed up while committing,
> the more that I've already done some cleanup here, but I'd like to
> understand this before it goes in.)
>
Hah, sorry for my bad memory. :)
I did not found P2M_QUERY; only P2M_UNSHARE and P2M_ALLOC are
defined. But after reading the code in ept_get_entry(), I guess the
P2M_UNSHARE is not accurate, maybe I should use 0 here for the
p2m_query_t parameter in get_page_from_gfn()?

>> +             if ( p2mt == p2m_mmio_write_dm )
>> +                 type = HVMOP_IO_RANGE_WP_MEM;
>> +
>> +             if ( ram_page )
>> +                 put_page(ram_page);
>> +        }
>>       }
>>
>>       list_for_each_entry ( s,
>> @@ -2642,6 +2656,11 @@ struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
>>               }
>>
>>               break;
>> +        case HVMOP_IO_RANGE_WP_MEM:
>> +            if ( rangeset_contains_singleton(r, PFN_DOWN(addr)) )
>> +                return s;
>
> Considering you've got p2m_mmio_write_dm above - can this
> validly return false here?

Well, if we have multiple ioreq servers defined, it will...
Currently, this p2m type is only used in XenGT, which has only one
ioreq server other than qemu for the vGPU. But suppose there will
be more devices using this type and more ioreq servers introduced
for them, it can return false.
>
> Jan
>
>
B.R.
Yu
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
>
Jan Beulich Jan. 26, 2016, 11:24 a.m. UTC | #3
>>> On 26.01.16 at 08:59, <yu.c.zhang@linux.intel.com> wrote:

> 
> On 1/22/2016 7:43 PM, Jan Beulich wrote:
>>>>> On 22.01.16 at 04:20, <yu.c.zhang@linux.intel.com> wrote:
>>> @@ -2601,6 +2605,16 @@ struct hvm_ioreq_server 
> *hvm_select_ioreq_server(struct domain *d,
>>>           type = (p->type == IOREQ_TYPE_PIO) ?
>>>                   HVMOP_IO_RANGE_PORT : HVMOP_IO_RANGE_MEMORY;
>>>           addr = p->addr;
>>> +        if ( type == HVMOP_IO_RANGE_MEMORY )
>>> +        {
>>> +             ram_page = get_page_from_gfn(d, p->addr >> PAGE_SHIFT,
>>> +                                          &p2mt, P2M_UNSHARE);
>>
>> It seems to me like I had asked before: Why P2M_UNSHARE instead
>> of just P2M_QUERY? (This could surely be fixed up while committing,
>> the more that I've already done some cleanup here, but I'd like to
>> understand this before it goes in.)
>>
> Hah, sorry for my bad memory. :)
> I did not found P2M_QUERY; only P2M_UNSHARE and P2M_ALLOC are
> defined. But after reading the code in ept_get_entry(), I guess the
> P2M_UNSHARE is not accurate, maybe I should use 0 here for the
> p2m_query_t parameter in get_page_from_gfn()?

Ah, sorry for the misnamed suggestion. I'm not sure whether using
zero here actually matches your needs; P2M_UNSHARE though
seems odd in any case, so at least switching to P2M_ALLOC (to
populate PoD pages) would seem to be necessary.

>>> @@ -2642,6 +2656,11 @@ struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
>>>               }
>>>
>>>               break;
>>> +        case HVMOP_IO_RANGE_WP_MEM:
>>> +            if ( rangeset_contains_singleton(r, PFN_DOWN(addr)) )
>>> +                return s;
>>
>> Considering you've got p2m_mmio_write_dm above - can this
>> validly return false here?
> 
> Well, if we have multiple ioreq servers defined, it will...

Ah, right. That's fine then.

Jan
Yu Zhang Jan. 27, 2016, 7:02 a.m. UTC | #4
On 1/26/2016 7:24 PM, Jan Beulich wrote:
>>>> On 26.01.16 at 08:59, <yu.c.zhang@linux.intel.com> wrote:
>
>>
>> On 1/22/2016 7:43 PM, Jan Beulich wrote:
>>>>>> On 22.01.16 at 04:20, <yu.c.zhang@linux.intel.com> wrote:
>>>> @@ -2601,6 +2605,16 @@ struct hvm_ioreq_server
>> *hvm_select_ioreq_server(struct domain *d,
>>>>            type = (p->type == IOREQ_TYPE_PIO) ?
>>>>                    HVMOP_IO_RANGE_PORT : HVMOP_IO_RANGE_MEMORY;
>>>>            addr = p->addr;
>>>> +        if ( type == HVMOP_IO_RANGE_MEMORY )
>>>> +        {
>>>> +             ram_page = get_page_from_gfn(d, p->addr >> PAGE_SHIFT,
>>>> +                                          &p2mt, P2M_UNSHARE);
>>>
>>> It seems to me like I had asked before: Why P2M_UNSHARE instead
>>> of just P2M_QUERY? (This could surely be fixed up while committing,
>>> the more that I've already done some cleanup here, but I'd like to
>>> understand this before it goes in.)
>>>
>> Hah, sorry for my bad memory. :)
>> I did not found P2M_QUERY; only P2M_UNSHARE and P2M_ALLOC are
>> defined. But after reading the code in ept_get_entry(), I guess the
>> P2M_UNSHARE is not accurate, maybe I should use 0 here for the
>> p2m_query_t parameter in get_page_from_gfn()?
>
> Ah, sorry for the misnamed suggestion. I'm not sure whether using
> zero here actually matches your needs; P2M_UNSHARE though
> seems odd in any case, so at least switching to P2M_ALLOC (to
> populate PoD pages) would seem to be necessary.
>

Thanks, Jan.  :)
And now I believe we should use zero here. By now XenGT does not
support PoD and here all we care about is whether the p2m type of this
gfn is p2m_mmio_write_dm.

>>>> @@ -2642,6 +2656,11 @@ struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
>>>>                }
>>>>
>>>>                break;
>>>> +        case HVMOP_IO_RANGE_WP_MEM:
>>>> +            if ( rangeset_contains_singleton(r, PFN_DOWN(addr)) )
>>>> +                return s;
>>>
>>> Considering you've got p2m_mmio_write_dm above - can this
>>> validly return false here?
>>
>> Well, if we have multiple ioreq servers defined, it will...
>
> Ah, right. That's fine then.
>
> Jan
>
>

B.R.
Yu
Jan Beulich Jan. 27, 2016, 10:28 a.m. UTC | #5
>>> On 27.01.16 at 08:02, <yu.c.zhang@linux.intel.com> wrote:

> 
> On 1/26/2016 7:24 PM, Jan Beulich wrote:
>>>>> On 26.01.16 at 08:59, <yu.c.zhang@linux.intel.com> wrote:
>>
>>>
>>> On 1/22/2016 7:43 PM, Jan Beulich wrote:
>>>>>>> On 22.01.16 at 04:20, <yu.c.zhang@linux.intel.com> wrote:
>>>>> @@ -2601,6 +2605,16 @@ struct hvm_ioreq_server
>>> *hvm_select_ioreq_server(struct domain *d,
>>>>>            type = (p->type == IOREQ_TYPE_PIO) ?
>>>>>                    HVMOP_IO_RANGE_PORT : HVMOP_IO_RANGE_MEMORY;
>>>>>            addr = p->addr;
>>>>> +        if ( type == HVMOP_IO_RANGE_MEMORY )
>>>>> +        {
>>>>> +             ram_page = get_page_from_gfn(d, p->addr >> PAGE_SHIFT,
>>>>> +                                          &p2mt, P2M_UNSHARE);
>>>>
>>>> It seems to me like I had asked before: Why P2M_UNSHARE instead
>>>> of just P2M_QUERY? (This could surely be fixed up while committing,
>>>> the more that I've already done some cleanup here, but I'd like to
>>>> understand this before it goes in.)
>>>>
>>> Hah, sorry for my bad memory. :)
>>> I did not found P2M_QUERY; only P2M_UNSHARE and P2M_ALLOC are
>>> defined. But after reading the code in ept_get_entry(), I guess the
>>> P2M_UNSHARE is not accurate, maybe I should use 0 here for the
>>> p2m_query_t parameter in get_page_from_gfn()?
>>
>> Ah, sorry for the misnamed suggestion. I'm not sure whether using
>> zero here actually matches your needs; P2M_UNSHARE though
>> seems odd in any case, so at least switching to P2M_ALLOC (to
>> populate PoD pages) would seem to be necessary.
>>
> 
> Thanks, Jan.  :)
> And now I believe we should use zero here. By now XenGT does not
> support PoD and here all we care about is whether the p2m type of this
> gfn is p2m_mmio_write_dm.

Okay, fine then (if, of course, it works).

Jan
diff mbox

Patch

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 079cad0..036c72d 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2023,6 +2023,37 @@  int xc_hvm_unmap_io_range_from_ioreq_server(xc_interface *xch,
                                             int is_mmio,
                                             uint64_t start,
                                             uint64_t end);
+/**
+ * This function registers a range of write-protected memory for emulation.
+ *
+ * @parm xch a handle to an open hypervisor interface.
+ * @parm domid the domain id to be serviced
+ * @parm id the IOREQ Server id.
+ * @parm start start of range
+ * @parm end end of range (inclusive).
+ * @return 0 on success, -1 on failure.
+ */
+int xc_hvm_map_wp_mem_range_to_ioreq_server(xc_interface *xch,
+                                            domid_t domid,
+                                            ioservid_t id,
+                                            xen_pfn_t start,
+                                            xen_pfn_t end);
+
+/**
+ * This function deregisters a range of write-protected memory for emulation.
+ *
+ * @parm xch a handle to an open hypervisor interface.
+ * @parm domid the domain id to be serviced
+ * @parm id the IOREQ Server id.
+ * @parm start start of range
+ * @parm end end of range (inclusive).
+ * @return 0 on success, -1 on failure.
+ */
+int xc_hvm_unmap_wp_mem_range_from_ioreq_server(xc_interface *xch,
+                                                domid_t domid,
+                                                ioservid_t id,
+                                                xen_pfn_t start,
+                                                xen_pfn_t end);
 
 /**
  * This function registers a PCI device for config space emulation.
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 99e0d48..4f43695 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -1544,6 +1544,67 @@  int xc_hvm_unmap_io_range_from_ioreq_server(xc_interface *xch, domid_t domid,
     return rc;
 }
 
+int xc_hvm_map_wp_mem_range_to_ioreq_server(xc_interface *xch,
+                                            domid_t domid,
+                                            ioservid_t id,
+                                            xen_pfn_t start,
+                                            xen_pfn_t end)
+{
+    DECLARE_HYPERCALL;
+    DECLARE_HYPERCALL_BUFFER(xen_hvm_io_range_t, arg);
+    int rc;
+
+    arg = xc_hypercall_buffer_alloc(xch, arg, sizeof(*arg));
+    if ( arg == NULL )
+        return -1;
+
+    hypercall.op     = __HYPERVISOR_hvm_op;
+    hypercall.arg[0] = HVMOP_map_io_range_to_ioreq_server;
+    hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
+
+    arg->domid = domid;
+    arg->id = id;
+    arg->type = HVMOP_IO_RANGE_WP_MEM;
+    arg->start = start;
+    arg->end = end;
+
+    rc = do_xen_hypercall(xch, &hypercall);
+
+    xc_hypercall_buffer_free(xch, arg);
+    return rc;
+}
+
+int xc_hvm_unmap_wp_mem_range_from_ioreq_server(xc_interface *xch,
+                                                domid_t domid,
+                                                ioservid_t id,
+                                                xen_pfn_t start,
+                                                xen_pfn_t end)
+{
+    DECLARE_HYPERCALL;
+    DECLARE_HYPERCALL_BUFFER(xen_hvm_io_range_t, arg);
+    int rc;
+
+    arg = xc_hypercall_buffer_alloc(xch, arg, sizeof(*arg));
+    if ( arg == NULL )
+        return -1;
+
+    hypercall.op     = __HYPERVISOR_hvm_op;
+    hypercall.arg[0] = HVMOP_unmap_io_range_from_ioreq_server;
+    hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
+
+    arg->domid = domid;
+    arg->id = id;
+    arg->type = HVMOP_IO_RANGE_WP_MEM;
+    arg->start = start;
+    arg->end = end;
+
+    rc = do_xen_hypercall(xch, &hypercall);
+
+    xc_hypercall_buffer_free(xch, arg);
+    return rc;
+
+}
+
 int xc_hvm_map_pcidev_to_ioreq_server(xc_interface *xch, domid_t domid,
                                       ioservid_t id, uint16_t segment,
                                       uint8_t bus, uint8_t device,
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 674feea..53d38e7 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -932,6 +932,9 @@  static void hvm_ioreq_server_free_rangesets(struct hvm_ioreq_server *s,
         rangeset_destroy(s->range[i]);
 }
 
+const char *io_range_name[NR_IO_RANGE_TYPES] =
+                                {"port", "mmio", "pci", "wp-mem"};
+
 static int hvm_ioreq_server_alloc_rangesets(struct hvm_ioreq_server *s, 
                                             bool_t is_default)
 {
@@ -946,10 +949,7 @@  static int hvm_ioreq_server_alloc_rangesets(struct hvm_ioreq_server *s,
         char *name;
 
         rc = asprintf(&name, "ioreq_server %d %s", s->id,
-                      (i == HVMOP_IO_RANGE_PORT) ? "port" :
-                      (i == HVMOP_IO_RANGE_MEMORY) ? "memory" :
-                      (i == HVMOP_IO_RANGE_PCI) ? "pci" :
-                      "");
+                      (i < NR_IO_RANGE_TYPES) ? io_range_name[i] : "");
         if ( rc )
             goto fail;
 
@@ -1267,6 +1267,7 @@  static int hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id,
             case HVMOP_IO_RANGE_PORT:
             case HVMOP_IO_RANGE_MEMORY:
             case HVMOP_IO_RANGE_PCI:
+            case HVMOP_IO_RANGE_WP_MEM:
                 r = s->range[type];
                 break;
 
@@ -1318,6 +1319,7 @@  static int hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id,
             case HVMOP_IO_RANGE_PORT:
             case HVMOP_IO_RANGE_MEMORY:
             case HVMOP_IO_RANGE_PCI:
+            case HVMOP_IO_RANGE_WP_MEM:
                 r = s->range[type];
                 break;
 
@@ -2558,6 +2560,8 @@  struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
     uint32_t cf8;
     uint8_t type;
     uint64_t addr;
+    p2m_type_t p2mt;
+    struct page_info *ram_page;
 
     if ( list_empty(&d->arch.hvm_domain.ioreq_server.list) )
         return NULL;
@@ -2601,6 +2605,16 @@  struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
         type = (p->type == IOREQ_TYPE_PIO) ?
                 HVMOP_IO_RANGE_PORT : HVMOP_IO_RANGE_MEMORY;
         addr = p->addr;
+        if ( type == HVMOP_IO_RANGE_MEMORY )
+        {
+             ram_page = get_page_from_gfn(d, p->addr >> PAGE_SHIFT,
+                                          &p2mt, P2M_UNSHARE);
+             if ( p2mt == p2m_mmio_write_dm )
+                 type = HVMOP_IO_RANGE_WP_MEM;
+
+             if ( ram_page )
+                 put_page(ram_page);
+        }
     }
 
     list_for_each_entry ( s,
@@ -2642,6 +2656,11 @@  struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
             }
 
             break;
+        case HVMOP_IO_RANGE_WP_MEM:
+            if ( rangeset_contains_singleton(r, PFN_DOWN(addr)) )
+                return s;
+
+            break;
         }
     }
 
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index a8cc2ad..1e13973 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -48,7 +48,7 @@  struct hvm_ioreq_vcpu {
     bool_t           pending;
 };
 
-#define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_PCI + 1)
+#define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_WP_MEM + 1)
 #define MAX_NR_IO_RANGES  256
 
 struct hvm_ioreq_server {
diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h
index 1606185..c0b1e30 100644
--- a/xen/include/public/hvm/hvm_op.h
+++ b/xen/include/public/hvm/hvm_op.h
@@ -333,6 +333,7 @@  struct xen_hvm_io_range {
 # define HVMOP_IO_RANGE_PORT   0 /* I/O port range */
 # define HVMOP_IO_RANGE_MEMORY 1 /* MMIO range */
 # define HVMOP_IO_RANGE_PCI    2 /* PCI segment/bus/dev/func range */
+# define HVMOP_IO_RANGE_WP_MEM 3 /* Write-protected ram range */
     uint64_aligned_t start, end; /* IN - inclusive start and end of range */
 };
 typedef struct xen_hvm_io_range xen_hvm_io_range_t;