diff mbox series

[v2,fixed,08/16] util/mmap-alloc: Factor out calculation of pagesize to mmap_pagesize()

Message ID 20200212134254.11073-9-david@redhat.com (mailing list archive)
State New, archived
Headers show
Series Ram blocks with resizable anonymous allocations under POSIX | expand

Commit Message

David Hildenbrand Feb. 12, 2020, 1:42 p.m. UTC
Factor it out and add a comment.

Reviewed-by: Igor Kotrasinski <i.kotrasinsk@partner.samsung.com>
Acked-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
Cc: Greg Kurz <groug@kaod.org>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 util/mmap-alloc.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

Comments

Peter Xu Feb. 19, 2020, 10:46 p.m. UTC | #1
On Wed, Feb 12, 2020 at 02:42:46PM +0100, David Hildenbrand wrote:
> Factor it out and add a comment.
> 
> Reviewed-by: Igor Kotrasinski <i.kotrasinsk@partner.samsung.com>
> Acked-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> Cc: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
> Cc: Greg Kurz <groug@kaod.org>
> Cc: Eduardo Habkost <ehabkost@redhat.com>
> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> Cc: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  util/mmap-alloc.c | 21 ++++++++++++---------
>  1 file changed, 12 insertions(+), 9 deletions(-)
> 
> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> index 27dcccd8ec..82f02a2cec 100644
> --- a/util/mmap-alloc.c
> +++ b/util/mmap-alloc.c
> @@ -82,17 +82,27 @@ size_t qemu_mempath_getpagesize(const char *mem_path)
>      return qemu_real_host_page_size;
>  }
>  
> +static inline size_t mmap_pagesize(int fd)
> +{
> +#if defined(__powerpc64__) && defined(__linux__)
> +    /* Mappings in the same segment must share the same page size */
> +    return qemu_fd_getpagesize(fd);
> +#else
> +    return qemu_real_host_page_size;
> +#endif
> +}

Pure question: This will return 4K even for huge pages on x86, is this
what we want?

This is of course not related to this specific patch which still
follows the old code, but I'm thinking whether it was intended or not
even in the old code (or is there anything to do with the
MAP_NORESERVE fix for ppc64 huge pages?).  Do you know the answer?

Thanks,

> +
>  void *qemu_ram_mmap(int fd,
>                      size_t size,
>                      size_t align,
>                      bool shared,
>                      bool is_pmem)
>  {
> +    const size_t pagesize = mmap_pagesize(fd);
>      int flags;
>      int map_sync_flags = 0;
>      int guardfd;
>      size_t offset;
> -    size_t pagesize;
>      size_t total;
>      void *guardptr;
>      void *ptr;
> @@ -113,7 +123,6 @@ void *qemu_ram_mmap(int fd,
>       * anonymous memory is OK.
>       */
>      flags = MAP_PRIVATE;
> -    pagesize = qemu_fd_getpagesize(fd);
>      if (fd == -1 || pagesize == qemu_real_host_page_size) {
>          guardfd = -1;
>          flags |= MAP_ANONYMOUS;
> @@ -123,7 +132,6 @@ void *qemu_ram_mmap(int fd,
>      }
>  #else
>      guardfd = -1;
> -    pagesize = qemu_real_host_page_size;
>      flags = MAP_PRIVATE | MAP_ANONYMOUS;
>  #endif
>  
> @@ -198,15 +206,10 @@ void *qemu_ram_mmap(int fd,
>  
>  void qemu_ram_munmap(int fd, void *ptr, size_t size)
>  {
> -    size_t pagesize;
> +    const size_t pagesize = mmap_pagesize(fd);
>  
>      if (ptr) {
>          /* Unmap both the RAM block and the guard page */
> -#if defined(__powerpc64__) && defined(__linux__)
> -        pagesize = qemu_fd_getpagesize(fd);
> -#else
> -        pagesize = qemu_real_host_page_size;
> -#endif
>          munmap(ptr, size + pagesize);
>      }
>  }
> -- 
> 2.24.1
> 
>
David Hildenbrand Feb. 24, 2020, 10:50 a.m. UTC | #2
On 19.02.20 23:46, Peter Xu wrote:
> On Wed, Feb 12, 2020 at 02:42:46PM +0100, David Hildenbrand wrote:
>> Factor it out and add a comment.
>>
>> Reviewed-by: Igor Kotrasinski <i.kotrasinsk@partner.samsung.com>
>> Acked-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>> Cc: "Michael S. Tsirkin" <mst@redhat.com>
>> Cc: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
>> Cc: Greg Kurz <groug@kaod.org>
>> Cc: Eduardo Habkost <ehabkost@redhat.com>
>> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
>> Cc: Igor Mammedov <imammedo@redhat.com>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> ---
>>  util/mmap-alloc.c | 21 ++++++++++++---------
>>  1 file changed, 12 insertions(+), 9 deletions(-)
>>
>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
>> index 27dcccd8ec..82f02a2cec 100644
>> --- a/util/mmap-alloc.c
>> +++ b/util/mmap-alloc.c
>> @@ -82,17 +82,27 @@ size_t qemu_mempath_getpagesize(const char *mem_path)
>>      return qemu_real_host_page_size;
>>  }
>>  
>> +static inline size_t mmap_pagesize(int fd)
>> +{
>> +#if defined(__powerpc64__) && defined(__linux__)
>> +    /* Mappings in the same segment must share the same page size */
>> +    return qemu_fd_getpagesize(fd);
>> +#else
>> +    return qemu_real_host_page_size;
>> +#endif
>> +}
> 
> Pure question: This will return 4K even for huge pages on x86, is this
> what we want?

(was asking myself the same question) I *think* it's intended. It's
mainly only used to allocate one additional guard page. The callers of
qemu_ram_mmap() make sure that the size is properly aligned (e.g., to
huge pages).

Of course, a 4k guard page is sufficient - unless we can't use that
(special case for ppc64 here).

Thanks!
David Hildenbrand Feb. 24, 2020, 10:57 a.m. UTC | #3
On 24.02.20 11:50, David Hildenbrand wrote:
> On 19.02.20 23:46, Peter Xu wrote:
>> On Wed, Feb 12, 2020 at 02:42:46PM +0100, David Hildenbrand wrote:
>>> Factor it out and add a comment.
>>>
>>> Reviewed-by: Igor Kotrasinski <i.kotrasinsk@partner.samsung.com>
>>> Acked-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
>>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>>> Cc: "Michael S. Tsirkin" <mst@redhat.com>
>>> Cc: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
>>> Cc: Greg Kurz <groug@kaod.org>
>>> Cc: Eduardo Habkost <ehabkost@redhat.com>
>>> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
>>> Cc: Igor Mammedov <imammedo@redhat.com>
>>> Signed-off-by: David Hildenbrand <david@redhat.com>
>>> ---
>>>  util/mmap-alloc.c | 21 ++++++++++++---------
>>>  1 file changed, 12 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
>>> index 27dcccd8ec..82f02a2cec 100644
>>> --- a/util/mmap-alloc.c
>>> +++ b/util/mmap-alloc.c
>>> @@ -82,17 +82,27 @@ size_t qemu_mempath_getpagesize(const char *mem_path)
>>>      return qemu_real_host_page_size;
>>>  }
>>>  
>>> +static inline size_t mmap_pagesize(int fd)
>>> +{
>>> +#if defined(__powerpc64__) && defined(__linux__)
>>> +    /* Mappings in the same segment must share the same page size */
>>> +    return qemu_fd_getpagesize(fd);
>>> +#else
>>> +    return qemu_real_host_page_size;
>>> +#endif
>>> +}
>>
>> Pure question: This will return 4K even for huge pages on x86, is this
>> what we want?
> 
> (was asking myself the same question) I *think* it's intended. It's
> mainly only used to allocate one additional guard page. The callers of
> qemu_ram_mmap() make sure that the size is properly aligned (e.g., to
> huge pages).
> 
> Of course, a 4k guard page is sufficient - unless we can't use that
> (special case for ppc64 here).
> 
> Thanks!
> 

We could rename the function to mmap_guard_pagesize(), thoughts?
Murilo Opsfelder Araújo Feb. 24, 2020, 2:16 p.m. UTC | #4
On Monday, February 24, 2020 7:57:03 AM -03 David Hildenbrand wrote:
> On 24.02.20 11:50, David Hildenbrand wrote:
> > On 19.02.20 23:46, Peter Xu wrote:
> >> On Wed, Feb 12, 2020 at 02:42:46PM +0100, David Hildenbrand wrote:
> >>> Factor it out and add a comment.
> >>>
> >>> Reviewed-by: Igor Kotrasinski <i.kotrasinsk@partner.samsung.com>
> >>> Acked-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
> >>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> >>> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> >>> Cc: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
> >>> Cc: Greg Kurz <groug@kaod.org>
> >>> Cc: Eduardo Habkost <ehabkost@redhat.com>
> >>> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> >>> Cc: Igor Mammedov <imammedo@redhat.com>
> >>> Signed-off-by: David Hildenbrand <david@redhat.com>
> >>> ---
> >>>
> >>>  util/mmap-alloc.c | 21 ++++++++++++---------
> >>>  1 file changed, 12 insertions(+), 9 deletions(-)
> >>>
> >>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> >>> index 27dcccd8ec..82f02a2cec 100644
> >>> --- a/util/mmap-alloc.c
> >>> +++ b/util/mmap-alloc.c
> >>> @@ -82,17 +82,27 @@ size_t qemu_mempath_getpagesize(const char
> >>> *mem_path)
> >>>
> >>>      return qemu_real_host_page_size;
> >>>
> >>>  }
> >>>
> >>> +static inline size_t mmap_pagesize(int fd)
> >>> +{
> >>> +#if defined(__powerpc64__) && defined(__linux__)
> >>> +    /* Mappings in the same segment must share the same page size */
> >>> +    return qemu_fd_getpagesize(fd);
> >>> +#else
> >>> +    return qemu_real_host_page_size;
> >>> +#endif
> >>> +}
> >>
> >> Pure question: This will return 4K even for huge pages on x86, is this
> >> what we want?
> >
> > (was asking myself the same question) I *think* it's intended. It's
> > mainly only used to allocate one additional guard page. The callers of
> > qemu_ram_mmap() make sure that the size is properly aligned (e.g., to
> > huge pages).
> >
> > Of course, a 4k guard page is sufficient - unless we can't use that
> > (special case for ppc64 here).
> >
> > Thanks!
>
> We could rename the function to mmap_guard_pagesize(), thoughts?

The existing qemu_fd_getpagesize() already returns qemu_real_host_page_size for
non-anonymous mappings (when fd == -1).  I think this new mmap_pagesize() could
be dropped in favor of qemu_fd_getpagesize().

A side effect of this change would be guard page using a bit more memory for
non-anonymous mapping.  Could that be a problem?

What do you think?

--
Murilo
Murilo Opsfelder Araújo Feb. 24, 2020, 2:25 p.m. UTC | #5
On Monday, February 24, 2020 11:16:16 AM -03 Murilo Opsfelder Araújo wrote:
> On Monday, February 24, 2020 7:57:03 AM -03 David Hildenbrand wrote:
> > On 24.02.20 11:50, David Hildenbrand wrote:
> > > On 19.02.20 23:46, Peter Xu wrote:
> > >> On Wed, Feb 12, 2020 at 02:42:46PM +0100, David Hildenbrand wrote:
> > >>> Factor it out and add a comment.
> > >>>
> > >>> Reviewed-by: Igor Kotrasinski <i.kotrasinsk@partner.samsung.com>
> > >>> Acked-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
> > >>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> > >>> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> > >>> Cc: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
> > >>> Cc: Greg Kurz <groug@kaod.org>
> > >>> Cc: Eduardo Habkost <ehabkost@redhat.com>
> > >>> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > >>> Cc: Igor Mammedov <imammedo@redhat.com>
> > >>> Signed-off-by: David Hildenbrand <david@redhat.com>
> > >>> ---
> > >>>
> > >>>  util/mmap-alloc.c | 21 ++++++++++++---------
> > >>>  1 file changed, 12 insertions(+), 9 deletions(-)
> > >>>
> > >>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> > >>> index 27dcccd8ec..82f02a2cec 100644
> > >>> --- a/util/mmap-alloc.c
> > >>> +++ b/util/mmap-alloc.c
> > >>> @@ -82,17 +82,27 @@ size_t qemu_mempath_getpagesize(const char
> > >>> *mem_path)
> > >>>
> > >>>      return qemu_real_host_page_size;
> > >>>
> > >>>  }
> > >>>
> > >>> +static inline size_t mmap_pagesize(int fd)
> > >>> +{
> > >>> +#if defined(__powerpc64__) && defined(__linux__)
> > >>> +    /* Mappings in the same segment must share the same page size */
> > >>> +    return qemu_fd_getpagesize(fd);
> > >>> +#else
> > >>> +    return qemu_real_host_page_size;
> > >>> +#endif
> > >>> +}
> > >>
> > >> Pure question: This will return 4K even for huge pages on x86, is this
> > >> what we want?
> > >
> > > (was asking myself the same question) I *think* it's intended. It's
> > > mainly only used to allocate one additional guard page. The callers of
> > > qemu_ram_mmap() make sure that the size is properly aligned (e.g., to
> > > huge pages).
> > >
> > > Of course, a 4k guard page is sufficient - unless we can't use that
> > > (special case for ppc64 here).
> > >
> > > Thanks!
> >
> > We could rename the function to mmap_guard_pagesize(), thoughts?
>
> The existing qemu_fd_getpagesize() already returns qemu_real_host_page_size
> for non-anonymous mappings (when fd == -1).  I think this new
> mmap_pagesize() could be dropped in favor of qemu_fd_getpagesize().

s/non-//

I mean "for anonymous mappings".

>
> A side effect of this change would be guard page using a bit more memory for
> non-anonymous mapping.  Could that be a problem?
>
> What do you think?
>
> --
> Murilo


--
Murilo
David Hildenbrand Feb. 24, 2020, 2:39 p.m. UTC | #6
On 24.02.20 15:25, Murilo Opsfelder Araújo wrote:
> On Monday, February 24, 2020 11:16:16 AM -03 Murilo Opsfelder Araújo wrote:
>> On Monday, February 24, 2020 7:57:03 AM -03 David Hildenbrand wrote:
>>> On 24.02.20 11:50, David Hildenbrand wrote:
>>>> On 19.02.20 23:46, Peter Xu wrote:
>>>>> On Wed, Feb 12, 2020 at 02:42:46PM +0100, David Hildenbrand wrote:
>>>>>> Factor it out and add a comment.
>>>>>>
>>>>>> Reviewed-by: Igor Kotrasinski <i.kotrasinsk@partner.samsung.com>
>>>>>> Acked-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
>>>>>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>>>>>> Cc: "Michael S. Tsirkin" <mst@redhat.com>
>>>>>> Cc: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
>>>>>> Cc: Greg Kurz <groug@kaod.org>
>>>>>> Cc: Eduardo Habkost <ehabkost@redhat.com>
>>>>>> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
>>>>>> Cc: Igor Mammedov <imammedo@redhat.com>
>>>>>> Signed-off-by: David Hildenbrand <david@redhat.com>
>>>>>> ---
>>>>>>
>>>>>>  util/mmap-alloc.c | 21 ++++++++++++---------
>>>>>>  1 file changed, 12 insertions(+), 9 deletions(-)
>>>>>>
>>>>>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
>>>>>> index 27dcccd8ec..82f02a2cec 100644
>>>>>> --- a/util/mmap-alloc.c
>>>>>> +++ b/util/mmap-alloc.c
>>>>>> @@ -82,17 +82,27 @@ size_t qemu_mempath_getpagesize(const char
>>>>>> *mem_path)
>>>>>>
>>>>>>      return qemu_real_host_page_size;
>>>>>>
>>>>>>  }
>>>>>>
>>>>>> +static inline size_t mmap_pagesize(int fd)
>>>>>> +{
>>>>>> +#if defined(__powerpc64__) && defined(__linux__)
>>>>>> +    /* Mappings in the same segment must share the same page size */
>>>>>> +    return qemu_fd_getpagesize(fd);
>>>>>> +#else
>>>>>> +    return qemu_real_host_page_size;
>>>>>> +#endif
>>>>>> +}
>>>>>
>>>>> Pure question: This will return 4K even for huge pages on x86, is this
>>>>> what we want?
>>>>
>>>> (was asking myself the same question) I *think* it's intended. It's
>>>> mainly only used to allocate one additional guard page. The callers of
>>>> qemu_ram_mmap() make sure that the size is properly aligned (e.g., to
>>>> huge pages).
>>>>
>>>> Of course, a 4k guard page is sufficient - unless we can't use that
>>>> (special case for ppc64 here).
>>>>
>>>> Thanks!
>>>
>>> We could rename the function to mmap_guard_pagesize(), thoughts?
>>
>> The existing qemu_fd_getpagesize() already returns qemu_real_host_page_size
>> for non-anonymous mappings (when fd == -1).  I think this new
>> mmap_pagesize() could be dropped in favor of qemu_fd_getpagesize().
> 
> s/non-//
> 
> I mean "for anonymous mappings".
> 
>>
>> A side effect of this change would be guard page using a bit more memory for
>> non-anonymous mapping.  Could that be a problem?
>>
>> What do you think?

At least under Linux it won't be an issue I guess. The same is probably
true for other OSs as well - the memory is not even readable, so why
should it consume memory. So it will only consume space in the virtual
address space.

Anyone reading along wants to object?
Peter Xu Feb. 24, 2020, 5:36 p.m. UTC | #7
On Mon, Feb 24, 2020 at 11:57:03AM +0100, David Hildenbrand wrote:
> On 24.02.20 11:50, David Hildenbrand wrote:
> > On 19.02.20 23:46, Peter Xu wrote:
> >> On Wed, Feb 12, 2020 at 02:42:46PM +0100, David Hildenbrand wrote:
> >>> Factor it out and add a comment.
> >>>
> >>> Reviewed-by: Igor Kotrasinski <i.kotrasinsk@partner.samsung.com>
> >>> Acked-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
> >>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> >>> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> >>> Cc: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
> >>> Cc: Greg Kurz <groug@kaod.org>
> >>> Cc: Eduardo Habkost <ehabkost@redhat.com>
> >>> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> >>> Cc: Igor Mammedov <imammedo@redhat.com>
> >>> Signed-off-by: David Hildenbrand <david@redhat.com>
> >>> ---
> >>>  util/mmap-alloc.c | 21 ++++++++++++---------
> >>>  1 file changed, 12 insertions(+), 9 deletions(-)
> >>>
> >>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> >>> index 27dcccd8ec..82f02a2cec 100644
> >>> --- a/util/mmap-alloc.c
> >>> +++ b/util/mmap-alloc.c
> >>> @@ -82,17 +82,27 @@ size_t qemu_mempath_getpagesize(const char *mem_path)
> >>>      return qemu_real_host_page_size;
> >>>  }
> >>>  
> >>> +static inline size_t mmap_pagesize(int fd)
> >>> +{
> >>> +#if defined(__powerpc64__) && defined(__linux__)
> >>> +    /* Mappings in the same segment must share the same page size */
> >>> +    return qemu_fd_getpagesize(fd);
> >>> +#else
> >>> +    return qemu_real_host_page_size;
> >>> +#endif
> >>> +}
> >>
> >> Pure question: This will return 4K even for huge pages on x86, is this
> >> what we want?
> > 
> > (was asking myself the same question) I *think* it's intended. It's
> > mainly only used to allocate one additional guard page. The callers of
> > qemu_ram_mmap() make sure that the size is properly aligned (e.g., to
> > huge pages).
> > 
> > Of course, a 4k guard page is sufficient - unless we can't use that
> > (special case for ppc64 here).
> > 
> > Thanks!
> > 
> 
> We could rename the function to mmap_guard_pagesize(), thoughts?

Yeh this looks better.

Out of topic: Actually I'm still confused on why we'd need to do so
much to just leave an PROT_NONE page after the buffer.  If the
hypervisor is buggy, it can logically writes to anywhere after all.
It's not a stack structure but it can be any guest RAM so I'm not sure
overflow detection really matters that much...

Thanks,
David Hildenbrand Feb. 24, 2020, 6:37 p.m. UTC | #8
On 24.02.20 18:36, Peter Xu wrote:
> On Mon, Feb 24, 2020 at 11:57:03AM +0100, David Hildenbrand wrote:
>> On 24.02.20 11:50, David Hildenbrand wrote:
>>> On 19.02.20 23:46, Peter Xu wrote:
>>>> On Wed, Feb 12, 2020 at 02:42:46PM +0100, David Hildenbrand wrote:
>>>>> Factor it out and add a comment.
>>>>>
>>>>> Reviewed-by: Igor Kotrasinski <i.kotrasinsk@partner.samsung.com>
>>>>> Acked-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
>>>>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>>>>> Cc: "Michael S. Tsirkin" <mst@redhat.com>
>>>>> Cc: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
>>>>> Cc: Greg Kurz <groug@kaod.org>
>>>>> Cc: Eduardo Habkost <ehabkost@redhat.com>
>>>>> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
>>>>> Cc: Igor Mammedov <imammedo@redhat.com>
>>>>> Signed-off-by: David Hildenbrand <david@redhat.com>
>>>>> ---
>>>>>  util/mmap-alloc.c | 21 ++++++++++++---------
>>>>>  1 file changed, 12 insertions(+), 9 deletions(-)
>>>>>
>>>>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
>>>>> index 27dcccd8ec..82f02a2cec 100644
>>>>> --- a/util/mmap-alloc.c
>>>>> +++ b/util/mmap-alloc.c
>>>>> @@ -82,17 +82,27 @@ size_t qemu_mempath_getpagesize(const char *mem_path)
>>>>>      return qemu_real_host_page_size;
>>>>>  }
>>>>>  
>>>>> +static inline size_t mmap_pagesize(int fd)
>>>>> +{
>>>>> +#if defined(__powerpc64__) && defined(__linux__)
>>>>> +    /* Mappings in the same segment must share the same page size */
>>>>> +    return qemu_fd_getpagesize(fd);
>>>>> +#else
>>>>> +    return qemu_real_host_page_size;
>>>>> +#endif
>>>>> +}
>>>>
>>>> Pure question: This will return 4K even for huge pages on x86, is this
>>>> what we want?
>>>
>>> (was asking myself the same question) I *think* it's intended. It's
>>> mainly only used to allocate one additional guard page. The callers of
>>> qemu_ram_mmap() make sure that the size is properly aligned (e.g., to
>>> huge pages).
>>>
>>> Of course, a 4k guard page is sufficient - unless we can't use that
>>> (special case for ppc64 here).
>>>
>>> Thanks!
>>>
>>
>> We could rename the function to mmap_guard_pagesize(), thoughts?
> 
> Yeh this looks better.
> 
> Out of topic: Actually I'm still confused on why we'd need to do so
> much to just leave an PROT_NONE page after the buffer.  If the
> hypervisor is buggy, it can logically writes to anywhere after all.
> It's not a stack structure but it can be any guest RAM so I'm not sure
> overflow detection really matters that much...

The comment in the file says

"Leave a single PROT_NONE page allocated after the RAM block, to serve
as a guard page guarding against potential buffer overflows."

So it's really just a safety net.

> 
> Thanks,
>
David Hildenbrand Feb. 26, 2020, 5:36 p.m. UTC | #9
On 24.02.20 15:16, Murilo Opsfelder Araújo wrote:
> On Monday, February 24, 2020 7:57:03 AM -03 David Hildenbrand wrote:
>> On 24.02.20 11:50, David Hildenbrand wrote:
>>> On 19.02.20 23:46, Peter Xu wrote:
>>>> On Wed, Feb 12, 2020 at 02:42:46PM +0100, David Hildenbrand wrote:
>>>>> Factor it out and add a comment.
>>>>>
>>>>> Reviewed-by: Igor Kotrasinski <i.kotrasinsk@partner.samsung.com>
>>>>> Acked-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
>>>>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>>>>> Cc: "Michael S. Tsirkin" <mst@redhat.com>
>>>>> Cc: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
>>>>> Cc: Greg Kurz <groug@kaod.org>
>>>>> Cc: Eduardo Habkost <ehabkost@redhat.com>
>>>>> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
>>>>> Cc: Igor Mammedov <imammedo@redhat.com>
>>>>> Signed-off-by: David Hildenbrand <david@redhat.com>
>>>>> ---
>>>>>
>>>>>  util/mmap-alloc.c | 21 ++++++++++++---------
>>>>>  1 file changed, 12 insertions(+), 9 deletions(-)
>>>>>
>>>>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
>>>>> index 27dcccd8ec..82f02a2cec 100644
>>>>> --- a/util/mmap-alloc.c
>>>>> +++ b/util/mmap-alloc.c
>>>>> @@ -82,17 +82,27 @@ size_t qemu_mempath_getpagesize(const char
>>>>> *mem_path)
>>>>>
>>>>>      return qemu_real_host_page_size;
>>>>>
>>>>>  }
>>>>>
>>>>> +static inline size_t mmap_pagesize(int fd)
>>>>> +{
>>>>> +#if defined(__powerpc64__) && defined(__linux__)
>>>>> +    /* Mappings in the same segment must share the same page size */
>>>>> +    return qemu_fd_getpagesize(fd);
>>>>> +#else
>>>>> +    return qemu_real_host_page_size;
>>>>> +#endif
>>>>> +}
>>>>
>>>> Pure question: This will return 4K even for huge pages on x86, is this
>>>> what we want?
>>>
>>> (was asking myself the same question) I *think* it's intended. It's
>>> mainly only used to allocate one additional guard page. The callers of
>>> qemu_ram_mmap() make sure that the size is properly aligned (e.g., to
>>> huge pages).
>>>
>>> Of course, a 4k guard page is sufficient - unless we can't use that
>>> (special case for ppc64 here).
>>>
>>> Thanks!
>>
>> We could rename the function to mmap_guard_pagesize(), thoughts?
> 
> The existing qemu_fd_getpagesize() already returns qemu_real_host_page_size for
> non-anonymous mappings (when fd == -1).  I think this new mmap_pagesize() could
> be dropped in favor of qemu_fd_getpagesize().
> 
> A side effect of this change would be guard page using a bit more memory for
> non-anonymous mapping.  Could that be a problem?
> 
> What do you think?

So, I'll stick to mmap_guard_pagesize() for now, but use the actual page
size (via qemu_fd_getpagesize()) in the new size asserts.

Thanks!
diff mbox series

Patch

diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index 27dcccd8ec..82f02a2cec 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -82,17 +82,27 @@  size_t qemu_mempath_getpagesize(const char *mem_path)
     return qemu_real_host_page_size;
 }
 
+static inline size_t mmap_pagesize(int fd)
+{
+#if defined(__powerpc64__) && defined(__linux__)
+    /* Mappings in the same segment must share the same page size */
+    return qemu_fd_getpagesize(fd);
+#else
+    return qemu_real_host_page_size;
+#endif
+}
+
 void *qemu_ram_mmap(int fd,
                     size_t size,
                     size_t align,
                     bool shared,
                     bool is_pmem)
 {
+    const size_t pagesize = mmap_pagesize(fd);
     int flags;
     int map_sync_flags = 0;
     int guardfd;
     size_t offset;
-    size_t pagesize;
     size_t total;
     void *guardptr;
     void *ptr;
@@ -113,7 +123,6 @@  void *qemu_ram_mmap(int fd,
      * anonymous memory is OK.
      */
     flags = MAP_PRIVATE;
-    pagesize = qemu_fd_getpagesize(fd);
     if (fd == -1 || pagesize == qemu_real_host_page_size) {
         guardfd = -1;
         flags |= MAP_ANONYMOUS;
@@ -123,7 +132,6 @@  void *qemu_ram_mmap(int fd,
     }
 #else
     guardfd = -1;
-    pagesize = qemu_real_host_page_size;
     flags = MAP_PRIVATE | MAP_ANONYMOUS;
 #endif
 
@@ -198,15 +206,10 @@  void *qemu_ram_mmap(int fd,
 
 void qemu_ram_munmap(int fd, void *ptr, size_t size)
 {
-    size_t pagesize;
+    const size_t pagesize = mmap_pagesize(fd);
 
     if (ptr) {
         /* Unmap both the RAM block and the guard page */
-#if defined(__powerpc64__) && defined(__linux__)
-        pagesize = qemu_fd_getpagesize(fd);
-#else
-        pagesize = qemu_real_host_page_size;
-#endif
         munmap(ptr, size + pagesize);
     }
 }