@@ -325,57 +325,83 @@ long arch_do_domctl(
case XEN_DOMCTL_getmemlist:
{
- int i;
+#define XEN_DOMCTL_getmemlist_max_pfns (GB(1) / PAGE_SIZE)
+ unsigned int i = 0, idx = 0;
unsigned long max_pfns = domctl->u.getmemlist.max_pfns;
+ unsigned long index = domctl->u.getmemlist.index;
uint64_t mfn;
struct page_info *page;
+ uint64_t *mfns;
if ( unlikely(d->is_dying) ) {
ret = -EINVAL;
break;
}
+ /* XSA-74: This sub-hypercall is fixed. */
- /*
- * XSA-74: This sub-hypercall is broken in several ways:
- * - lock order inversion (p2m locks inside page_alloc_lock)
- * - no preemption on huge max_pfns input
- * - not (re-)checking d->is_dying with page_alloc_lock held
- * - not honoring start_pfn input (which libxc also doesn't set)
- * Additionally it is rather useless, as the result is stale by the
- * time the caller gets to look at it.
- * As it only has a single, non-production consumer (xen-mceinj),
- * rather than trying to fix it we restrict it for the time being.
- */
- if ( /* No nested locks inside copy_to_guest_offset(). */
- paging_mode_external(current->domain) ||
- /* Arbitrary limit capping processing time. */
- max_pfns > GB(4) / PAGE_SIZE )
+ ret = -E2BIG;
+ if ( max_pfns > XEN_DOMCTL_getmemlist_max_pfns )
+ max_pfns = XEN_DOMCTL_getmemlist_max_pfns;
+
+ /* Report the max number we are OK with. */
+ if ( !max_pfns && guest_handle_is_null(domctl->u.getmemlist.buffer) )
{
- ret = -EOPNOTSUPP;
+ domctl->u.getmemlist.max_pfns = XEN_DOMCTL_getmemlist_max_pfns;
+ copyback = 1;
break;
}
- spin_lock(&d->page_alloc_lock);
+ ret = -EINVAL;
+ if ( !guest_handle_okay(domctl->u.getmemlist.buffer, max_pfns) )
+ break;
+
+ mfns = xmalloc_array(uint64_t, max_pfns);
+ if ( !mfns )
+ {
+ ret = -ENOMEM;
+ break;
+ }
- ret = i = 0;
+ ret = -EINVAL;
+ spin_lock(&d->page_alloc_lock);
page_list_for_each(page, &d->page_list)
{
- if ( i >= max_pfns )
+ if ( idx >= max_pfns )
break;
+
+ if ( index > i++ )
+ continue;
+
+ if ( idx && !(idx & 0xFF) && hypercall_preempt_check() )
+ break;
+
mfn = page_to_mfn(page);
- if ( copy_to_guest_offset(domctl->u.getmemlist.buffer,
- i, &mfn, 1) )
+ mfns[idx++] = mfn;
+ }
+ spin_unlock(&d->page_alloc_lock);
+
+ ret = 0;
+ for ( i = 0; i < idx; i++ )
+ {
+
+ if ( __copy_to_guest_offset(domctl->u.getmemlist.buffer,
+ i, &mfns[i], 1) )
{
ret = -EFAULT;
break;
}
- ++i;
}
- spin_unlock(&d->page_alloc_lock);
-
domctl->u.getmemlist.num_pfns = i;
+ /*
+ * A poor-man way of keeping track of P2M changes. If the P2M
+ * is changed the version will change as well and the caller
+ * can redo it's list.
+ */
+ domctl->u.getmemlist.version = p2m_get_hostp2m(d)->version;
+
copyback = 1;
+ xfree(mfns);
}
break;
@@ -709,6 +709,7 @@ hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p,
if ( old_flags & _PAGE_PRESENT )
flush_tlb_mask(d->domain_dirty_cpumask);
+ p2m_get_hostp2m(d)->version++;
paging_unlock(d);
if ( flush_nestedp2m )
@@ -674,6 +674,8 @@ void ept_sync_domain(struct p2m_domain *p2m)
{
struct domain *d = p2m->domain;
struct ept_data *ept = &p2m->ept;
+
+ p2m->version++;
/* Only if using EPT and this domain has some VCPUs to dirty. */
if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] )
return;
@@ -293,6 +293,8 @@ struct p2m_domain {
struct ept_data ept;
/* NPT-equivalent structure could be added here. */
};
+ /* OVM: Every update to P2M increases this version. */
+ unsigned long version;
};
/* get host p2m table */
@@ -118,16 +118,36 @@ typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t);
-/* XEN_DOMCTL_getmemlist */
+/*
+ * XEN_DOMCTL_getmemlist
+ * Retrieve an array of mfns of the guest.
+ *
+ * If the hypercall returns an zero value, then it has copied 'num_pfns'
+ * (up to `max_pfns`) of the MFNs in 'buffer', along with the
+ * `version` updated (it may be the same across hypercalls. If it
+ * varies the data is stale and it is recommended that the caller restart
+ * iwht 'index' being zero).
+ *
+ * If the 'max_pfns' is zero, and 'buffer' is NULL, the hypercall returns
+ * -E2BIG and updates the 'max_pfns' with the recommend value to be used.
+ *
+ * Note that due to the asynchronous nature of hypercalls the domain might have
+ * added or removed the number of MFNS making this information stale. It is
+ * the responsibility of the toolstack to use the `version` field to check
+ * between each invocation. if the version differs it should discard the stale
+ * data and start from scratch. It is OK for the toolstack to use the new
+ * `version` field.
+ */
struct xen_domctl_getmemlist {
- /* IN variables. */
- /* Max entries to write to output buffer. */
+ /* IN/OUT: Max entries to write to output buffer. If max_pfns is zero and
+ * buffer is NULL, this has the recommend max size of buffer. */
uint64_aligned_t max_pfns;
- /* Start index in guest's page list. */
- uint64_aligned_t start_pfn;
- XEN_GUEST_HANDLE_64(uint64) buffer;
- /* OUT variables. */
- uint64_aligned_t num_pfns;
+ uint64_aligned_t index; /* IN: Start index in guest's page list. */
+ XEN_GUEST_HANDLE_64(uint64) buffer; /* IN: If NULL with max_pfns == 0, then
+ * max_pfns has recommend value. */
+ uint64_aligned_t version; /* IN/OUT: If value differs, prior calls may
+ * have stale data. */
+ uint64_aligned_t num_pfns; /* OUT: Number (up to max_pfns) copied. */
};
typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t);