diff mbox series

[12/20] fuse: Introduce setupmapping/removemapping commands

Message ID 20200304165845.3081-13-vgoyal@redhat.com (mailing list archive)
State New, archived
Headers show
Series virtiofs: Add DAX support | expand

Commit Message

Vivek Goyal March 4, 2020, 4:58 p.m. UTC
Introduce two new fuse commands to setup/remove memory mappings. This
will be used to setup/tear down file mapping in dax window.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
---
 include/uapi/linux/fuse.h | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

Comments

Miklos Szeredi March 10, 2020, 7:49 p.m. UTC | #1
On Wed, Mar 4, 2020 at 5:59 PM Vivek Goyal <vgoyal@redhat.com> wrote:
>
> Introduce two new fuse commands to setup/remove memory mappings. This
> will be used to setup/tear down file mapping in dax window.
>
> Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
> ---
>  include/uapi/linux/fuse.h | 37 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 37 insertions(+)
>
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index 5b85819e045f..62633555d547 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -894,4 +894,41 @@ struct fuse_copy_file_range_in {
>         uint64_t        flags;
>  };
>
> +#define FUSE_SETUPMAPPING_ENTRIES 8
> +#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
> +struct fuse_setupmapping_in {
> +       /* An already open handle */
> +       uint64_t        fh;
> +       /* Offset into the file to start the mapping */
> +       uint64_t        foffset;
> +       /* Length of mapping required */
> +       uint64_t        len;
> +       /* Flags, FUSE_SETUPMAPPING_FLAG_* */
> +       uint64_t        flags;
> +       /* Offset in Memory Window */
> +       uint64_t        moffset;
> +};
> +
> +struct fuse_setupmapping_out {
> +       /* Offsets into the cache of mappings */
> +       uint64_t        coffset[FUSE_SETUPMAPPING_ENTRIES];
> +        /* Lengths of each mapping */
> +        uint64_t       len[FUSE_SETUPMAPPING_ENTRIES];
> +};

fuse_setupmapping_out together with FUSE_SETUPMAPPING_ENTRIES seem to be unused.

Thanks,
Miklos
Vivek Goyal March 10, 2020, 8:33 p.m. UTC | #2
On Tue, Mar 10, 2020 at 08:49:49PM +0100, Miklos Szeredi wrote:
> On Wed, Mar 4, 2020 at 5:59 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> >
> > Introduce two new fuse commands to setup/remove memory mappings. This
> > will be used to setup/tear down file mapping in dax window.
> >
> > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
> > ---
> >  include/uapi/linux/fuse.h | 37 +++++++++++++++++++++++++++++++++++++
> >  1 file changed, 37 insertions(+)
> >
> > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> > index 5b85819e045f..62633555d547 100644
> > --- a/include/uapi/linux/fuse.h
> > +++ b/include/uapi/linux/fuse.h
> > @@ -894,4 +894,41 @@ struct fuse_copy_file_range_in {
> >         uint64_t        flags;
> >  };
> >
> > +#define FUSE_SETUPMAPPING_ENTRIES 8
> > +#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
> > +struct fuse_setupmapping_in {
> > +       /* An already open handle */
> > +       uint64_t        fh;
> > +       /* Offset into the file to start the mapping */
> > +       uint64_t        foffset;
> > +       /* Length of mapping required */
> > +       uint64_t        len;
> > +       /* Flags, FUSE_SETUPMAPPING_FLAG_* */
> > +       uint64_t        flags;
> > +       /* Offset in Memory Window */
> > +       uint64_t        moffset;
> > +};
> > +
> > +struct fuse_setupmapping_out {
> > +       /* Offsets into the cache of mappings */
> > +       uint64_t        coffset[FUSE_SETUPMAPPING_ENTRIES];
> > +        /* Lengths of each mapping */
> > +        uint64_t       len[FUSE_SETUPMAPPING_ENTRIES];
> > +};
> 
> fuse_setupmapping_out together with FUSE_SETUPMAPPING_ENTRIES seem to be unused.

This looks like leftover from the old code. I will get rid of it. Thanks.

Vivek
Amir Goldstein March 11, 2020, 7:03 a.m. UTC | #3
On Tue, Mar 10, 2020 at 10:34 PM Vivek Goyal <vgoyal@redhat.com> wrote:
>
> On Tue, Mar 10, 2020 at 08:49:49PM +0100, Miklos Szeredi wrote:
> > On Wed, Mar 4, 2020 at 5:59 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> > >
> > > Introduce two new fuse commands to setup/remove memory mappings. This
> > > will be used to setup/tear down file mapping in dax window.
> > >
> > > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > > Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
> > > ---
> > >  include/uapi/linux/fuse.h | 37 +++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 37 insertions(+)
> > >
> > > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> > > index 5b85819e045f..62633555d547 100644
> > > --- a/include/uapi/linux/fuse.h
> > > +++ b/include/uapi/linux/fuse.h
> > > @@ -894,4 +894,41 @@ struct fuse_copy_file_range_in {
> > >         uint64_t        flags;
> > >  };
> > >
> > > +#define FUSE_SETUPMAPPING_ENTRIES 8
> > > +#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
> > > +struct fuse_setupmapping_in {
> > > +       /* An already open handle */
> > > +       uint64_t        fh;
> > > +       /* Offset into the file to start the mapping */
> > > +       uint64_t        foffset;
> > > +       /* Length of mapping required */
> > > +       uint64_t        len;
> > > +       /* Flags, FUSE_SETUPMAPPING_FLAG_* */
> > > +       uint64_t        flags;
> > > +       /* Offset in Memory Window */
> > > +       uint64_t        moffset;
> > > +};
> > > +
> > > +struct fuse_setupmapping_out {
> > > +       /* Offsets into the cache of mappings */
> > > +       uint64_t        coffset[FUSE_SETUPMAPPING_ENTRIES];
> > > +        /* Lengths of each mapping */
> > > +        uint64_t       len[FUSE_SETUPMAPPING_ENTRIES];
> > > +};
> >
> > fuse_setupmapping_out together with FUSE_SETUPMAPPING_ENTRIES seem to be unused.
>
> This looks like leftover from the old code. I will get rid of it. Thanks.
>

Hmm. I wonder if we should keep some out args for future extensions.
Maybe return the mapped size even though it is all or nothing at this
point?

I have interest in a similar FUSE mapping functionality that was prototyped
by Miklos and published here:
https://lore.kernel.org/linux-fsdevel/CAJfpegtjEoE7H8tayLaQHG9fRSBiVuaspnmPr2oQiOZXVB1+7g@mail.gmail.com/

In this prototype, a FUSE_MAP command is used by the server to map a
range of file to the kernel for io. The command in args are quite similar to
those in fuse_setupmapping_in, but since the server is on the same host,
the mapping response is {mapfd, offset, size}.

I wonder, if we decide to go forward with this prototype (and I may well decide
to drive this forward), should the new command be overloading
FUSE_SETUPMAPPING, by using new flags or should it be a new
command? In either case, I think it would be best to try and make a decision
now in order to avoid ambiguity with protocol command/flag names later on.

If we decide that those are completely different beasts and it is agreed that
the future command will be named, for example, FUSE_SETUPIOMAP
with different arguments and that this naming will not create confusion and
ambiguity with FUSE_SETUPMAPPING, then there is no actionable item at
this time.

But it is possible that there is something to gain from using the same
command(?) and same book keeping mechanism for both types of
mappings. Even server on same host could decide that it wants
to map some file regions via mmap and some via iomap.
In that case, perhaps we should make the FUSE_SETUPMAPPING
response args expressive enough to be able to express an iomap
mapping in the future and perhaps dax code should explicitly request
for FUSE_SETUPMAPPING_FLAG_DAX mapping type?

Thoughts?

Amir.
Miklos Szeredi March 11, 2020, 2:19 p.m. UTC | #4
On Wed, Mar 11, 2020 at 8:03 AM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Tue, Mar 10, 2020 at 10:34 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> >
> > On Tue, Mar 10, 2020 at 08:49:49PM +0100, Miklos Szeredi wrote:
> > > On Wed, Mar 4, 2020 at 5:59 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> > > >
> > > > Introduce two new fuse commands to setup/remove memory mappings. This
> > > > will be used to setup/tear down file mapping in dax window.
> > > >
> > > > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > > > Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
> > > > ---
> > > >  include/uapi/linux/fuse.h | 37 +++++++++++++++++++++++++++++++++++++
> > > >  1 file changed, 37 insertions(+)
> > > >
> > > > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> > > > index 5b85819e045f..62633555d547 100644
> > > > --- a/include/uapi/linux/fuse.h
> > > > +++ b/include/uapi/linux/fuse.h
> > > > @@ -894,4 +894,41 @@ struct fuse_copy_file_range_in {
> > > >         uint64_t        flags;
> > > >  };
> > > >
> > > > +#define FUSE_SETUPMAPPING_ENTRIES 8
> > > > +#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
> > > > +struct fuse_setupmapping_in {
> > > > +       /* An already open handle */
> > > > +       uint64_t        fh;
> > > > +       /* Offset into the file to start the mapping */
> > > > +       uint64_t        foffset;
> > > > +       /* Length of mapping required */
> > > > +       uint64_t        len;
> > > > +       /* Flags, FUSE_SETUPMAPPING_FLAG_* */
> > > > +       uint64_t        flags;
> > > > +       /* Offset in Memory Window */
> > > > +       uint64_t        moffset;
> > > > +};
> > > > +
> > > > +struct fuse_setupmapping_out {
> > > > +       /* Offsets into the cache of mappings */
> > > > +       uint64_t        coffset[FUSE_SETUPMAPPING_ENTRIES];
> > > > +        /* Lengths of each mapping */
> > > > +        uint64_t       len[FUSE_SETUPMAPPING_ENTRIES];
> > > > +};
> > >
> > > fuse_setupmapping_out together with FUSE_SETUPMAPPING_ENTRIES seem to be unused.
> >
> > This looks like leftover from the old code. I will get rid of it. Thanks.
> >
>
> Hmm. I wonder if we should keep some out args for future extensions.
> Maybe return the mapped size even though it is all or nothing at this
> point?
>
> I have interest in a similar FUSE mapping functionality that was prototyped
> by Miklos and published here:
> https://lore.kernel.org/linux-fsdevel/CAJfpegtjEoE7H8tayLaQHG9fRSBiVuaspnmPr2oQiOZXVB1+7g@mail.gmail.com/
>
> In this prototype, a FUSE_MAP command is used by the server to map a
> range of file to the kernel for io. The command in args are quite similar to
> those in fuse_setupmapping_in, but since the server is on the same host,
> the mapping response is {mapfd, offset, size}.

Right.  So the difference is in which entity allocates the mapping.
IOW whether the {fd, offset, size} is input or output in the protocol.

I don't remember the reasons for going with the mapping being
allocated by the client, not the other way round.   Vivek?

If the allocation were to be by the server, we could share the request
type and possibly some code between the two, although the I/O
mechanism would still be different.

Thanks,
Miklos
Vivek Goyal March 11, 2020, 2:41 p.m. UTC | #5
On Wed, Mar 11, 2020 at 03:19:18PM +0100, Miklos Szeredi wrote:
> On Wed, Mar 11, 2020 at 8:03 AM Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Tue, Mar 10, 2020 at 10:34 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> > >
> > > On Tue, Mar 10, 2020 at 08:49:49PM +0100, Miklos Szeredi wrote:
> > > > On Wed, Mar 4, 2020 at 5:59 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> > > > >
> > > > > Introduce two new fuse commands to setup/remove memory mappings. This
> > > > > will be used to setup/tear down file mapping in dax window.
> > > > >
> > > > > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > > > > Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
> > > > > ---
> > > > >  include/uapi/linux/fuse.h | 37 +++++++++++++++++++++++++++++++++++++
> > > > >  1 file changed, 37 insertions(+)
> > > > >
> > > > > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> > > > > index 5b85819e045f..62633555d547 100644
> > > > > --- a/include/uapi/linux/fuse.h
> > > > > +++ b/include/uapi/linux/fuse.h
> > > > > @@ -894,4 +894,41 @@ struct fuse_copy_file_range_in {
> > > > >         uint64_t        flags;
> > > > >  };
> > > > >
> > > > > +#define FUSE_SETUPMAPPING_ENTRIES 8
> > > > > +#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
> > > > > +struct fuse_setupmapping_in {
> > > > > +       /* An already open handle */
> > > > > +       uint64_t        fh;
> > > > > +       /* Offset into the file to start the mapping */
> > > > > +       uint64_t        foffset;
> > > > > +       /* Length of mapping required */
> > > > > +       uint64_t        len;
> > > > > +       /* Flags, FUSE_SETUPMAPPING_FLAG_* */
> > > > > +       uint64_t        flags;
> > > > > +       /* Offset in Memory Window */
> > > > > +       uint64_t        moffset;
> > > > > +};
> > > > > +
> > > > > +struct fuse_setupmapping_out {
> > > > > +       /* Offsets into the cache of mappings */
> > > > > +       uint64_t        coffset[FUSE_SETUPMAPPING_ENTRIES];
> > > > > +        /* Lengths of each mapping */
> > > > > +        uint64_t       len[FUSE_SETUPMAPPING_ENTRIES];
> > > > > +};
> > > >
> > > > fuse_setupmapping_out together with FUSE_SETUPMAPPING_ENTRIES seem to be unused.
> > >
> > > This looks like leftover from the old code. I will get rid of it. Thanks.
> > >
> >
> > Hmm. I wonder if we should keep some out args for future extensions.
> > Maybe return the mapped size even though it is all or nothing at this
> > point?
> >
> > I have interest in a similar FUSE mapping functionality that was prototyped
> > by Miklos and published here:
> > https://lore.kernel.org/linux-fsdevel/CAJfpegtjEoE7H8tayLaQHG9fRSBiVuaspnmPr2oQiOZXVB1+7g@mail.gmail.com/
> >
> > In this prototype, a FUSE_MAP command is used by the server to map a
> > range of file to the kernel for io. The command in args are quite similar to
> > those in fuse_setupmapping_in, but since the server is on the same host,
> > the mapping response is {mapfd, offset, size}.
> 
> Right.  So the difference is in which entity allocates the mapping.
> IOW whether the {fd, offset, size} is input or output in the protocol.
> 
> I don't remember the reasons for going with the mapping being
> allocated by the client, not the other way round.   Vivek?

I think one of the main reasons is memory reclaim. Once all ranges in 
a cache range are allocated, we need to free a memory range which can be
reused. And client has all the logic to free up that range so that it can
be remapped and reused for a different file/offset. Server will not know
any of this. So I will think that for virtiofs, server might not be
able to decide where to map a section of file and it has to be told
explicitly by the client.

> 
> If the allocation were to be by the server, we could share the request
> type and possibly some code between the two, although the I/O
> mechanism would still be different.
> 

So input parameters of both FUSE_SETUPMAPPING and FUSE_MAP seem
similar (except the moffset field).  Given output of FUSE_MAP reqeust
is very different, I would think it will be easier to have it as a
separate command.

Or can it be some sort of optional output args which can differentiate
between two types of requests. 

/me personally finds it simpler to have separate command instead of
overloading FUSE_SETUPMAPPING. But its your call. :-) 

Vivek
Miklos Szeredi March 11, 2020, 3:12 p.m. UTC | #6
On Wed, Mar 11, 2020 at 3:41 PM Vivek Goyal <vgoyal@redhat.com> wrote:
>
> On Wed, Mar 11, 2020 at 03:19:18PM +0100, Miklos Szeredi wrote:
> > On Wed, Mar 11, 2020 at 8:03 AM Amir Goldstein <amir73il@gmail.com> wrote:
> > >
> > > On Tue, Mar 10, 2020 at 10:34 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> > > >
> > > > On Tue, Mar 10, 2020 at 08:49:49PM +0100, Miklos Szeredi wrote:
> > > > > On Wed, Mar 4, 2020 at 5:59 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> > > > > >
> > > > > > Introduce two new fuse commands to setup/remove memory mappings. This
> > > > > > will be used to setup/tear down file mapping in dax window.
> > > > > >
> > > > > > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > > > > > Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
> > > > > > ---
> > > > > >  include/uapi/linux/fuse.h | 37 +++++++++++++++++++++++++++++++++++++
> > > > > >  1 file changed, 37 insertions(+)
> > > > > >
> > > > > > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> > > > > > index 5b85819e045f..62633555d547 100644
> > > > > > --- a/include/uapi/linux/fuse.h
> > > > > > +++ b/include/uapi/linux/fuse.h
> > > > > > @@ -894,4 +894,41 @@ struct fuse_copy_file_range_in {
> > > > > >         uint64_t        flags;
> > > > > >  };
> > > > > >
> > > > > > +#define FUSE_SETUPMAPPING_ENTRIES 8
> > > > > > +#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
> > > > > > +struct fuse_setupmapping_in {
> > > > > > +       /* An already open handle */
> > > > > > +       uint64_t        fh;
> > > > > > +       /* Offset into the file to start the mapping */
> > > > > > +       uint64_t        foffset;
> > > > > > +       /* Length of mapping required */
> > > > > > +       uint64_t        len;
> > > > > > +       /* Flags, FUSE_SETUPMAPPING_FLAG_* */
> > > > > > +       uint64_t        flags;
> > > > > > +       /* Offset in Memory Window */
> > > > > > +       uint64_t        moffset;
> > > > > > +};
> > > > > > +
> > > > > > +struct fuse_setupmapping_out {
> > > > > > +       /* Offsets into the cache of mappings */
> > > > > > +       uint64_t        coffset[FUSE_SETUPMAPPING_ENTRIES];
> > > > > > +        /* Lengths of each mapping */
> > > > > > +        uint64_t       len[FUSE_SETUPMAPPING_ENTRIES];
> > > > > > +};
> > > > >
> > > > > fuse_setupmapping_out together with FUSE_SETUPMAPPING_ENTRIES seem to be unused.
> > > >
> > > > This looks like leftover from the old code. I will get rid of it. Thanks.
> > > >
> > >
> > > Hmm. I wonder if we should keep some out args for future extensions.
> > > Maybe return the mapped size even though it is all or nothing at this
> > > point?
> > >
> > > I have interest in a similar FUSE mapping functionality that was prototyped
> > > by Miklos and published here:
> > > https://lore.kernel.org/linux-fsdevel/CAJfpegtjEoE7H8tayLaQHG9fRSBiVuaspnmPr2oQiOZXVB1+7g@mail.gmail.com/
> > >
> > > In this prototype, a FUSE_MAP command is used by the server to map a
> > > range of file to the kernel for io. The command in args are quite similar to
> > > those in fuse_setupmapping_in, but since the server is on the same host,
> > > the mapping response is {mapfd, offset, size}.
> >
> > Right.  So the difference is in which entity allocates the mapping.
> > IOW whether the {fd, offset, size} is input or output in the protocol.
> >
> > I don't remember the reasons for going with the mapping being
> > allocated by the client, not the other way round.   Vivek?
>
> I think one of the main reasons is memory reclaim. Once all ranges in
> a cache range are allocated, we need to free a memory range which can be
> reused. And client has all the logic to free up that range so that it can
> be remapped and reused for a different file/offset. Server will not know
> any of this. So I will think that for virtiofs, server might not be
> able to decide where to map a section of file and it has to be told
> explicitly by the client.

Okay.

> >
> > If the allocation were to be by the server, we could share the request
> > type and possibly some code between the two, although the I/O
> > mechanism would still be different.
> >
>
> So input parameters of both FUSE_SETUPMAPPING and FUSE_MAP seem
> similar (except the moffset field).  Given output of FUSE_MAP reqeust
> is very different, I would think it will be easier to have it as a
> separate command.
>
> Or can it be some sort of optional output args which can differentiate
> between two types of requests.
>
> /me personally finds it simpler to have separate command instead of
> overloading FUSE_SETUPMAPPING. But its your call. :-)

I too prefer a separate request type.

Thanks,
Miklos
diff mbox series

Patch

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 5b85819e045f..62633555d547 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -894,4 +894,41 @@  struct fuse_copy_file_range_in {
 	uint64_t	flags;
 };
 
+#define FUSE_SETUPMAPPING_ENTRIES 8
+#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
+struct fuse_setupmapping_in {
+	/* An already open handle */
+	uint64_t	fh;
+	/* Offset into the file to start the mapping */
+	uint64_t	foffset;
+	/* Length of mapping required */
+	uint64_t	len;
+	/* Flags, FUSE_SETUPMAPPING_FLAG_* */
+	uint64_t	flags;
+	/* Offset in Memory Window */
+	uint64_t	moffset;
+};
+
+struct fuse_setupmapping_out {
+	/* Offsets into the cache of mappings */
+	uint64_t	coffset[FUSE_SETUPMAPPING_ENTRIES];
+        /* Lengths of each mapping */
+        uint64_t	len[FUSE_SETUPMAPPING_ENTRIES];
+};
+
+struct fuse_removemapping_in {
+	/* number of fuse_removemapping_one follows */
+	uint32_t        count;
+};
+
+struct fuse_removemapping_one {
+	/* Offset into the dax window start the unmapping */
+	uint64_t        moffset;
+        /* Length of mapping required */
+        uint64_t	len;
+};
+
+#define FUSE_REMOVEMAPPING_MAX_ENTRY   \
+		(PAGE_SIZE / sizeof(struct fuse_removemapping_one))
+
 #endif /* _LINUX_FUSE_H */