diff mbox series

[v2,1/1] system/physmem: take into account fd_offset for file fallocate

Message ID 20250121225426.3043160-2-william.roche@oracle.com (mailing list archive)
State New
Headers show
Series fallocate missing fd_offset | expand

Commit Message

“William Roche Jan. 21, 2025, 10:54 p.m. UTC
From: William Roche <william.roche@oracle.com>

Punching a hole in a file with fallocate needs to take into account the
fd_offset value for a correct file location.
But guest_memfd internal use doesn't currently consider fd_offset.

Fixes: 4b870dc4d0c0 ("hostmem-file: add offset option")

Signed-off-by: William Roche <william.roche@oracle.com>
---
 system/physmem.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

Comments

David Hildenbrand Jan. 22, 2025, 8:01 a.m. UTC | #1
On 21.01.25 23:54, “William Roche wrote:
> From: William Roche <william.roche@oracle.com>
> 
> Punching a hole in a file with fallocate needs to take into account the
> fd_offset value for a correct file location.
> But guest_memfd internal use doesn't currently consider fd_offset.
> 
> Fixes: 4b870dc4d0c0 ("hostmem-file: add offset option")
> 
> Signed-off-by: William Roche <william.roche@oracle.com>
> ---
>   system/physmem.c | 8 +++++---
>   1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/system/physmem.c b/system/physmem.c
> index c76503aea8..7e4da79311 100644
> --- a/system/physmem.c
> +++ b/system/physmem.c
> @@ -3655,6 +3655,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
>           need_madvise = (rb->page_size == qemu_real_host_page_size());
>           need_fallocate = rb->fd != -1;
>           if (need_fallocate) {
> +            uint64_t file_offset = start + rb->fd_offset;

Taking another closer look ...

Could likely be "off_t".

>               /* For a file, this causes the area of the file to be zero'd
>                * if read, and for hugetlbfs also causes it to be unmapped
>                * so a userfault will trigger.
> @@ -3689,18 +3690,18 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
>               }
>   
>               ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
> -                            start, length);
> +                            file_offset, length);
>               if (ret) {
>                   ret = -errno;
>                   error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
> -                             __func__, rb->idstr, start, length, ret);
> +                             __func__, rb->idstr, file_offset, length, ret);
>                   goto err;
>               }
>   #else
>               ret = -ENOSYS;
>               error_report("%s: fallocate not available/file"
>                            "%s:%" PRIx64 " +%zx (%d)",
> -                         __func__, rb->idstr, start, length, ret);
> +                         __func__, rb->idstr, file_offset, length, ret);
>               goto err;
>   #endif

Thinking again, both error_report() should likely not have the offset 
replaced?

We are printing essentially the parameters to ram_block_discard_range() 
-- range in the ramblock -- just like in the "Failed to discard range" 
range.

So maybe just leave it like is or print the file offset additionally? 
(which might only make sense in the "Failed to fallocate" case).


Thanks!
“William Roche Jan. 22, 2025, 7:39 p.m. UTC | #2
On 1/22/25 09:01, David Hildenbrand wrote:
> On 21.01.25 23:54, “William Roche wrote:
>> From: William Roche <william.roche@oracle.com>
>> [...]
>> --- a/system/physmem.c
>> +++ b/system/physmem.c
>> @@ -3655,6 +3655,7 @@ int ram_block_discard_range(RAMBlock *rb, 
>> uint64_t start, size_t length)
>>           need_madvise = (rb->page_size == qemu_real_host_page_size());
>>           need_fallocate = rb->fd != -1;
>>           if (need_fallocate) {
>> +            uint64_t file_offset = start + rb->fd_offset;
> 
> Taking another closer look ...
> 
> Could likely be "off_t".

Right.


>>               /* For a file, this causes the area of the file to be 
>> zero'd
>>                * if read, and for hugetlbfs also causes it to be unmapped
>>                * so a userfault will trigger.
>> @@ -3689,18 +3690,18 @@ int ram_block_discard_range(RAMBlock *rb, 
>> uint64_t start, size_t length)
>>               }
>>               ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
>> -                            start, length);
>> +                            file_offset, length);
>>               if (ret) {
>>                   ret = -errno;
>>                   error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
>> -                             __func__, rb->idstr, start, length, ret);
>> +                             __func__, rb->idstr, file_offset, length, ret);
>>                   goto err;
>>               }
>>   #else
>>               ret = -ENOSYS;
>>               error_report("%s: fallocate not available/file"
>>                            "%s:%" PRIx64 " +%zx (%d)",
>> -                         __func__, rb->idstr, start, length, ret);
>> +                         __func__, rb->idstr, file_offset, length, ret);
>>               goto err;
>>   #endif
> 
> Thinking again, both error_report() should likely not have the offset 
> replaced?
> 
> We are printing essentially the parameters to ram_block_discard_range() 
> -- range in the ramblock -- just like in the "Failed to discard range" 
> range.
> 
> So maybe just leave it like is or print the file offset additionally? 
> (which might only make sense in the "Failed to fallocate" case).

I understand that the start value may be clearer to read than the global 
file_offset. So I'm slightly modifying the error message to show 
<start>+<fd_offset> (without space) which would usually be <start>+0

For example:
  ram_block_discard_range: Failed to fallocate ram-node1:f2db000+0 +1000 
(-5)

instead of:
  ram_block_discard_range: Failed to fallocate ram-node1:f2db000 +1000 (-5)

The length notation isn't changing, coming afterwards with a space -- so 
that it continues to match all the other similar range error messages in 
system/physmem.c.

I also align the "fallocate not available/file" message to show the 
extra +<fd_offset> after the <start>.


I'm sending a v3 version now.

William.
diff mbox series

Patch

diff --git a/system/physmem.c b/system/physmem.c
index c76503aea8..7e4da79311 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -3655,6 +3655,7 @@  int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
         need_madvise = (rb->page_size == qemu_real_host_page_size());
         need_fallocate = rb->fd != -1;
         if (need_fallocate) {
+            uint64_t file_offset = start + rb->fd_offset;
             /* For a file, this causes the area of the file to be zero'd
              * if read, and for hugetlbfs also causes it to be unmapped
              * so a userfault will trigger.
@@ -3689,18 +3690,18 @@  int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
             }
 
             ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-                            start, length);
+                            file_offset, length);
             if (ret) {
                 ret = -errno;
                 error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
-                             __func__, rb->idstr, start, length, ret);
+                             __func__, rb->idstr, file_offset, length, ret);
                 goto err;
             }
 #else
             ret = -ENOSYS;
             error_report("%s: fallocate not available/file"
                          "%s:%" PRIx64 " +%zx (%d)",
-                         __func__, rb->idstr, start, length, ret);
+                         __func__, rb->idstr, file_offset, length, ret);
             goto err;
 #endif
         }
@@ -3747,6 +3748,7 @@  int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
     int ret = -1;
 
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+    /* ignore fd_offset with guest_memfd */
     ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                     start, length);