Message ID | 20231115071519.2864957-10-xiaoyao.li@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | QEMU Guest memfd + QEMU TDX support | expand |
On Wed, Nov 15, 2023 at 02:14:18AM -0500, Xiaoyao Li <xiaoyao.li@intel.com> wrote: > It's used for discarding opposite memory after memory conversion, for > confidential guest. > > When page is converted from shared to private, the original shared > memory can be discarded via ram_block_discard_range(); > > When page is converted from private to shared, the original private > memory is back'ed by guest_memfd. Introduce > ram_block_discard_guest_memfd_range() for discarding memory in > guest_memfd. > > Originally-from: Isaku Yamahata <isaku.yamahata@intel.com> > Codeveloped-by: Xiaoyao Li <xiaoyao.li@intel.com> > Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com> > --- > include/exec/cpu-common.h | 2 ++ > system/physmem.c | 50 +++++++++++++++++++++++++++++++++++++++ > 2 files changed, 52 insertions(+) > > diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h > index 41115d891940..de728a18eef2 100644 > --- a/include/exec/cpu-common.h > +++ b/include/exec/cpu-common.h > @@ -175,6 +175,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); > > int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); > int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); > +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length, > + bool shared_to_private); > > #endif > > diff --git a/system/physmem.c b/system/physmem.c > index ddfecddefcd6..cd6008fa09ad 100644 > --- a/system/physmem.c > +++ b/system/physmem.c > @@ -3641,6 +3641,29 @@ err: > return ret; > } > > +static int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, > + size_t length) > +{ > + int ret = -1; > + > +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE > + ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, > + start, length); > + > + if (ret) { > + ret = -errno; > + error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)", > + __func__, rb->idstr, start, length, ret); > + } > +#else > + ret = -ENOSYS; > + error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)", > + __func__, rb->idstr, start, length, ret); > +#endif > + > + return ret; > +} > + > bool ramblock_is_pmem(RAMBlock *rb) > { > return rb->flags & RAM_PMEM; > @@ -3828,3 +3851,30 @@ bool ram_block_discard_is_required(void) > return qatomic_read(&ram_block_discard_required_cnt) || > qatomic_read(&ram_block_coordinated_discard_required_cnt); > } > + > +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length, > + bool shared_to_private) > +{ > + if (!rb || rb->guest_memfd < 0) { > + return -1; > + } > + > + if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) || > + !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) { > + return -1; > + } > + > + if (!length) { > + return -1; > + } > + > + if (start + length > rb->max_length) { > + return -1; > + } > + > + if (shared_to_private) { > + return ram_block_discard_range(rb, start, length); > + } else { > + return ram_block_discard_guest_memfd_range(rb, start, length); > + } > +} Originally this function issued KVM_SET_MEMORY_ATTRIBUTES, the function name mad sense. But now it doesn't, and it issues only punch hole. We should rename it to represent what it actually does. discard_range?
On 11/18/2023 5:03 AM, Isaku Yamahata wrote: > On Wed, Nov 15, 2023 at 02:14:18AM -0500, > Xiaoyao Li <xiaoyao.li@intel.com> wrote: > >> It's used for discarding opposite memory after memory conversion, for >> confidential guest. >> >> When page is converted from shared to private, the original shared >> memory can be discarded via ram_block_discard_range(); >> >> When page is converted from private to shared, the original private >> memory is back'ed by guest_memfd. Introduce >> ram_block_discard_guest_memfd_range() for discarding memory in >> guest_memfd. >> >> Originally-from: Isaku Yamahata <isaku.yamahata@intel.com> >> Codeveloped-by: Xiaoyao Li <xiaoyao.li@intel.com> >> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com> >> --- >> include/exec/cpu-common.h | 2 ++ >> system/physmem.c | 50 +++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 52 insertions(+) >> >> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h >> index 41115d891940..de728a18eef2 100644 >> --- a/include/exec/cpu-common.h >> +++ b/include/exec/cpu-common.h >> @@ -175,6 +175,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); >> >> int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); >> int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); >> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length, >> + bool shared_to_private); >> >> #endif >> >> diff --git a/system/physmem.c b/system/physmem.c >> index ddfecddefcd6..cd6008fa09ad 100644 >> --- a/system/physmem.c >> +++ b/system/physmem.c >> @@ -3641,6 +3641,29 @@ err: >> return ret; >> } >> >> +static int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, >> + size_t length) >> +{ >> + int ret = -1; >> + >> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE >> + ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, >> + start, length); >> + >> + if (ret) { >> + ret = -errno; >> + error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)", >> + __func__, rb->idstr, start, length, ret); >> + } >> +#else >> + ret = -ENOSYS; >> + error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)", >> + __func__, rb->idstr, start, length, ret); >> +#endif >> + >> + return ret; >> +} >> + >> bool ramblock_is_pmem(RAMBlock *rb) >> { >> return rb->flags & RAM_PMEM; >> @@ -3828,3 +3851,30 @@ bool ram_block_discard_is_required(void) >> return qatomic_read(&ram_block_discard_required_cnt) || >> qatomic_read(&ram_block_coordinated_discard_required_cnt); >> } >> + >> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length, >> + bool shared_to_private) >> +{ >> + if (!rb || rb->guest_memfd < 0) { >> + return -1; >> + } >> + >> + if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) || >> + !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) { >> + return -1; >> + } >> + >> + if (!length) { >> + return -1; >> + } >> + >> + if (start + length > rb->max_length) { >> + return -1; >> + } >> + >> + if (shared_to_private) { >> + return ram_block_discard_range(rb, start, length); >> + } else { >> + return ram_block_discard_guest_memfd_range(rb, start, length); >> + } >> +} > > Originally this function issued KVM_SET_MEMORY_ATTRIBUTES, the function name > mad sense. But now it doesn't, and it issues only punch hole. We should rename > it to represent what it actually does. discard_range? ram_block_discard_range() already exists for non-guest-memfd memory discard. I cannot come up with a proper name. e.g., ram_block_discard_opposite_range() while *opposite* seems unclear. Do you have any better idea?
On 08.12.23 08:59, Xiaoyao Li wrote: > On 11/18/2023 5:03 AM, Isaku Yamahata wrote: >> On Wed, Nov 15, 2023 at 02:14:18AM -0500, >> Xiaoyao Li <xiaoyao.li@intel.com> wrote: >> >>> It's used for discarding opposite memory after memory conversion, for >>> confidential guest. >>> >>> When page is converted from shared to private, the original shared >>> memory can be discarded via ram_block_discard_range(); >>> >>> When page is converted from private to shared, the original private >>> memory is back'ed by guest_memfd. Introduce >>> ram_block_discard_guest_memfd_range() for discarding memory in >>> guest_memfd. >>> >>> Originally-from: Isaku Yamahata <isaku.yamahata@intel.com> >>> Codeveloped-by: Xiaoyao Li <xiaoyao.li@intel.com> >>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com> >>> --- >>> include/exec/cpu-common.h | 2 ++ >>> system/physmem.c | 50 +++++++++++++++++++++++++++++++++++++++ >>> 2 files changed, 52 insertions(+) >>> >>> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h >>> index 41115d891940..de728a18eef2 100644 >>> --- a/include/exec/cpu-common.h >>> +++ b/include/exec/cpu-common.h >>> @@ -175,6 +175,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); >>> >>> int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); >>> int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); >>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length, >>> + bool shared_to_private); >>> >>> #endif >>> >>> diff --git a/system/physmem.c b/system/physmem.c >>> index ddfecddefcd6..cd6008fa09ad 100644 >>> --- a/system/physmem.c >>> +++ b/system/physmem.c >>> @@ -3641,6 +3641,29 @@ err: >>> return ret; >>> } >>> >>> +static int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, >>> + size_t length) >>> +{ >>> + int ret = -1; >>> + >>> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE >>> + ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, >>> + start, length); >>> + >>> + if (ret) { >>> + ret = -errno; >>> + error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)", >>> + __func__, rb->idstr, start, length, ret); >>> + } >>> +#else >>> + ret = -ENOSYS; >>> + error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)", >>> + __func__, rb->idstr, start, length, ret); >>> +#endif >>> + >>> + return ret; >>> +} >>> + >>> bool ramblock_is_pmem(RAMBlock *rb) >>> { >>> return rb->flags & RAM_PMEM; >>> @@ -3828,3 +3851,30 @@ bool ram_block_discard_is_required(void) >>> return qatomic_read(&ram_block_discard_required_cnt) || >>> qatomic_read(&ram_block_coordinated_discard_required_cnt); >>> } >>> + >>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length, >>> + bool shared_to_private) >>> +{ >>> + if (!rb || rb->guest_memfd < 0) { >>> + return -1; >>> + } >>> + >>> + if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) || >>> + !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) { >>> + return -1; >>> + } >>> + >>> + if (!length) { >>> + return -1; >>> + } >>> + >>> + if (start + length > rb->max_length) { >>> + return -1; >>> + } >>> + >>> + if (shared_to_private) { >>> + return ram_block_discard_range(rb, start, length); >>> + } else { >>> + return ram_block_discard_guest_memfd_range(rb, start, length); >>> + } >>> +} >> >> Originally this function issued KVM_SET_MEMORY_ATTRIBUTES, the function name >> mad sense. But now it doesn't, and it issues only punch hole. We should rename >> it to represent what it actually does. discard_range? > > ram_block_discard_range() already exists for non-guest-memfd memory discard. > > I cannot come up with a proper name. e.g., > ram_block_discard_opposite_range() while *opposite* seems unclear. > > Do you have any better idea? Having some indication that this is about "guest_memfd" back and forth switching/conversion will make sense. But I'm also not able to come up with a better name. Maybe have two functions: ram_block_activate_guest_memfd_range ram_block_deactivate_guest_memfd_range
On 12/8/2023 7:52 PM, David Hildenbrand wrote: > On 08.12.23 08:59, Xiaoyao Li wrote: >> On 11/18/2023 5:03 AM, Isaku Yamahata wrote: >>> On Wed, Nov 15, 2023 at 02:14:18AM -0500, >>> Xiaoyao Li <xiaoyao.li@intel.com> wrote: >>> >>>> It's used for discarding opposite memory after memory conversion, for >>>> confidential guest. >>>> >>>> When page is converted from shared to private, the original shared >>>> memory can be discarded via ram_block_discard_range(); >>>> >>>> When page is converted from private to shared, the original private >>>> memory is back'ed by guest_memfd. Introduce >>>> ram_block_discard_guest_memfd_range() for discarding memory in >>>> guest_memfd. >>>> >>>> Originally-from: Isaku Yamahata <isaku.yamahata@intel.com> >>>> Codeveloped-by: Xiaoyao Li <xiaoyao.li@intel.com> >>>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com> >>>> --- >>>> include/exec/cpu-common.h | 2 ++ >>>> system/physmem.c | 50 >>>> +++++++++++++++++++++++++++++++++++++++ >>>> 2 files changed, 52 insertions(+) >>>> >>>> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h >>>> index 41115d891940..de728a18eef2 100644 >>>> --- a/include/exec/cpu-common.h >>>> +++ b/include/exec/cpu-common.h >>>> @@ -175,6 +175,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, >>>> void *opaque); >>>> int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); >>>> int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t >>>> length); >>>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t >>>> length, >>>> + bool shared_to_private); >>>> #endif >>>> diff --git a/system/physmem.c b/system/physmem.c >>>> index ddfecddefcd6..cd6008fa09ad 100644 >>>> --- a/system/physmem.c >>>> +++ b/system/physmem.c >>>> @@ -3641,6 +3641,29 @@ err: >>>> return ret; >>>> } >>>> +static int ram_block_discard_guest_memfd_range(RAMBlock *rb, >>>> uint64_t start, >>>> + size_t length) >>>> +{ >>>> + int ret = -1; >>>> + >>>> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE >>>> + ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | >>>> FALLOC_FL_KEEP_SIZE, >>>> + start, length); >>>> + >>>> + if (ret) { >>>> + ret = -errno; >>>> + error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx >>>> (%d)", >>>> + __func__, rb->idstr, start, length, ret); >>>> + } >>>> +#else >>>> + ret = -ENOSYS; >>>> + error_report("%s: fallocate not available %s:%" PRIx64 " +%zx >>>> (%d)", >>>> + __func__, rb->idstr, start, length, ret); >>>> +#endif >>>> + >>>> + return ret; >>>> +} >>>> + >>>> bool ramblock_is_pmem(RAMBlock *rb) >>>> { >>>> return rb->flags & RAM_PMEM; >>>> @@ -3828,3 +3851,30 @@ bool ram_block_discard_is_required(void) >>>> return qatomic_read(&ram_block_discard_required_cnt) || >>>> >>>> qatomic_read(&ram_block_coordinated_discard_required_cnt); >>>> } >>>> + >>>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t >>>> length, >>>> + bool shared_to_private) >>>> +{ >>>> + if (!rb || rb->guest_memfd < 0) { >>>> + return -1; >>>> + } >>>> + >>>> + if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) || >>>> + !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) { >>>> + return -1; >>>> + } >>>> + >>>> + if (!length) { >>>> + return -1; >>>> + } >>>> + >>>> + if (start + length > rb->max_length) { >>>> + return -1; >>>> + } >>>> + >>>> + if (shared_to_private) { >>>> + return ram_block_discard_range(rb, start, length); >>>> + } else { >>>> + return ram_block_discard_guest_memfd_range(rb, start, length); >>>> + } >>>> +} >>> >>> Originally this function issued KVM_SET_MEMORY_ATTRIBUTES, the >>> function name >>> mad sense. But now it doesn't, and it issues only punch hole. We >>> should rename >>> it to represent what it actually does. discard_range? >> >> ram_block_discard_range() already exists for non-guest-memfd memory >> discard. >> >> I cannot come up with a proper name. e.g., >> ram_block_discard_opposite_range() while *opposite* seems unclear. >> >> Do you have any better idea? > > Having some indication that this is about "guest_memfd" back and forth > switching/conversion will make sense. But I'm also not able to come up > with a better name. > > Maybe have two functions: > > ram_block_activate_guest_memfd_range > ram_block_deactivate_guest_memfd_range > finally, I decide to drop this function and expose ram_block_discard_guest_memfd_range() instead. So caller can call the ram_block_discard_*() on its own.
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 41115d891940..de728a18eef2 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -175,6 +175,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length, + bool shared_to_private); #endif diff --git a/system/physmem.c b/system/physmem.c index ddfecddefcd6..cd6008fa09ad 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -3641,6 +3641,29 @@ err: return ret; } +static int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, + size_t length) +{ + int ret = -1; + +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + start, length); + + if (ret) { + ret = -errno; + error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)", + __func__, rb->idstr, start, length, ret); + } +#else + ret = -ENOSYS; + error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)", + __func__, rb->idstr, start, length, ret); +#endif + + return ret; +} + bool ramblock_is_pmem(RAMBlock *rb) { return rb->flags & RAM_PMEM; @@ -3828,3 +3851,30 @@ bool ram_block_discard_is_required(void) return qatomic_read(&ram_block_discard_required_cnt) || qatomic_read(&ram_block_coordinated_discard_required_cnt); } + +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length, + bool shared_to_private) +{ + if (!rb || rb->guest_memfd < 0) { + return -1; + } + + if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) || + !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) { + return -1; + } + + if (!length) { + return -1; + } + + if (start + length > rb->max_length) { + return -1; + } + + if (shared_to_private) { + return ram_block_discard_range(rb, start, length); + } else { + return ram_block_discard_guest_memfd_range(rb, start, length); + } +}
It's used for discarding opposite memory after memory conversion, for confidential guest. When page is converted from shared to private, the original shared memory can be discarded via ram_block_discard_range(); When page is converted from private to shared, the original private memory is back'ed by guest_memfd. Introduce ram_block_discard_guest_memfd_range() for discarding memory in guest_memfd. Originally-from: Isaku Yamahata <isaku.yamahata@intel.com> Codeveloped-by: Xiaoyao Li <xiaoyao.li@intel.com> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com> --- include/exec/cpu-common.h | 2 ++ system/physmem.c | 50 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+)