diff mbox series

[v1,03/15] net: generalise net_iov chunk owners

Message ID 20241007221603.1703699-4-dw@davidwei.uk (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series io_uring zero copy rx | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply, async

Commit Message

David Wei Oct. 7, 2024, 10:15 p.m. UTC
From: Pavel Begunkov <asml.silence@gmail.com>

Currently net_iov stores a pointer to struct dmabuf_genpool_chunk_owner,
which serves as a useful abstraction to share data and provide a
context. However, it's too devmem specific, and we want to reuse it for
other memory providers, and for that we need to decouple net_iov from
devmem. Make net_iov to point to a new base structure called
net_iov_area, which dmabuf_genpool_chunk_owner extends.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: David Wei <dw@davidwei.uk>
---
 include/net/netmem.h | 21 ++++++++++++++++++++-
 net/core/devmem.c    | 25 +++++++++++++------------
 net/core/devmem.h    | 25 +++++++++----------------
 3 files changed, 42 insertions(+), 29 deletions(-)

Comments

Stanislav Fomichev Oct. 8, 2024, 3:46 p.m. UTC | #1
On 10/07, David Wei wrote:
> From: Pavel Begunkov <asml.silence@gmail.com>
> 
> Currently net_iov stores a pointer to struct dmabuf_genpool_chunk_owner,
> which serves as a useful abstraction to share data and provide a
> context. However, it's too devmem specific, and we want to reuse it for
> other memory providers, and for that we need to decouple net_iov from
> devmem. Make net_iov to point to a new base structure called
> net_iov_area, which dmabuf_genpool_chunk_owner extends.
> 
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> Signed-off-by: David Wei <dw@davidwei.uk>
> ---
>  include/net/netmem.h | 21 ++++++++++++++++++++-
>  net/core/devmem.c    | 25 +++++++++++++------------
>  net/core/devmem.h    | 25 +++++++++----------------
>  3 files changed, 42 insertions(+), 29 deletions(-)
> 
> diff --git a/include/net/netmem.h b/include/net/netmem.h
> index 8a6e20be4b9d..3795ded30d2c 100644
> --- a/include/net/netmem.h
> +++ b/include/net/netmem.h
> @@ -24,11 +24,20 @@ struct net_iov {
>  	unsigned long __unused_padding;
>  	unsigned long pp_magic;
>  	struct page_pool *pp;
> -	struct dmabuf_genpool_chunk_owner *owner;
> +	struct net_iov_area *owner;

Any reason not to use dmabuf_genpool_chunk_owner as is (or rename it
to net_iov_area to generalize) with the fields that you don't need
set to 0/NULL? container_of makes everything harder to follow :-(
Pavel Begunkov Oct. 8, 2024, 4:34 p.m. UTC | #2
On 10/8/24 16:46, Stanislav Fomichev wrote:
> On 10/07, David Wei wrote:
>> From: Pavel Begunkov <asml.silence@gmail.com>
>>
>> Currently net_iov stores a pointer to struct dmabuf_genpool_chunk_owner,
>> which serves as a useful abstraction to share data and provide a
>> context. However, it's too devmem specific, and we want to reuse it for
>> other memory providers, and for that we need to decouple net_iov from
>> devmem. Make net_iov to point to a new base structure called
>> net_iov_area, which dmabuf_genpool_chunk_owner extends.
>>
>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>> Signed-off-by: David Wei <dw@davidwei.uk>
>> ---
>>   include/net/netmem.h | 21 ++++++++++++++++++++-
>>   net/core/devmem.c    | 25 +++++++++++++------------
>>   net/core/devmem.h    | 25 +++++++++----------------
>>   3 files changed, 42 insertions(+), 29 deletions(-)
>>
>> diff --git a/include/net/netmem.h b/include/net/netmem.h
>> index 8a6e20be4b9d..3795ded30d2c 100644
>> --- a/include/net/netmem.h
>> +++ b/include/net/netmem.h
>> @@ -24,11 +24,20 @@ struct net_iov {
>>   	unsigned long __unused_padding;
>>   	unsigned long pp_magic;
>>   	struct page_pool *pp;
>> -	struct dmabuf_genpool_chunk_owner *owner;
>> +	struct net_iov_area *owner;
> 
> Any reason not to use dmabuf_genpool_chunk_owner as is (or rename it
> to net_iov_area to generalize) with the fields that you don't need
> set to 0/NULL? container_of makes everything harder to follow :-(

It can be that, but then io_uring would have a (null) pointer to
struct net_devmem_dmabuf_binding it knows nothing about and other
fields devmem might add in the future. Also, it reduces the
temptation for the common code to make assumptions about the origin
of the area / pp memory provider. IOW, I think it's cleaner
when separated like in this patch.
Stanislav Fomichev Oct. 9, 2024, 4:28 p.m. UTC | #3
On 10/08, Pavel Begunkov wrote:
> On 10/8/24 16:46, Stanislav Fomichev wrote:
> > On 10/07, David Wei wrote:
> > > From: Pavel Begunkov <asml.silence@gmail.com>
> > > 
> > > Currently net_iov stores a pointer to struct dmabuf_genpool_chunk_owner,
> > > which serves as a useful abstraction to share data and provide a
> > > context. However, it's too devmem specific, and we want to reuse it for
> > > other memory providers, and for that we need to decouple net_iov from
> > > devmem. Make net_iov to point to a new base structure called
> > > net_iov_area, which dmabuf_genpool_chunk_owner extends.
> > > 
> > > Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> > > Signed-off-by: David Wei <dw@davidwei.uk>
> > > ---
> > >   include/net/netmem.h | 21 ++++++++++++++++++++-
> > >   net/core/devmem.c    | 25 +++++++++++++------------
> > >   net/core/devmem.h    | 25 +++++++++----------------
> > >   3 files changed, 42 insertions(+), 29 deletions(-)
> > > 
> > > diff --git a/include/net/netmem.h b/include/net/netmem.h
> > > index 8a6e20be4b9d..3795ded30d2c 100644
> > > --- a/include/net/netmem.h
> > > +++ b/include/net/netmem.h
> > > @@ -24,11 +24,20 @@ struct net_iov {
> > >   	unsigned long __unused_padding;
> > >   	unsigned long pp_magic;
> > >   	struct page_pool *pp;
> > > -	struct dmabuf_genpool_chunk_owner *owner;
> > > +	struct net_iov_area *owner;
> > 
> > Any reason not to use dmabuf_genpool_chunk_owner as is (or rename it
> > to net_iov_area to generalize) with the fields that you don't need
> > set to 0/NULL? container_of makes everything harder to follow :-(
> 
> It can be that, but then io_uring would have a (null) pointer to
> struct net_devmem_dmabuf_binding it knows nothing about and other
> fields devmem might add in the future. Also, it reduces the
> temptation for the common code to make assumptions about the origin
> of the area / pp memory provider. IOW, I think it's cleaner
> when separated like in this patch.

Ack, let's see whether other people find any issues with this approach.
For me, it makes the devmem parts harder to read, so my preference
is on dropping this patch and keeping owner=null on your side.
Mina Almasry Oct. 9, 2024, 8:44 p.m. UTC | #4
On Mon, Oct 7, 2024 at 3:16 PM David Wei <dw@davidwei.uk> wrote:
>
> From: Pavel Begunkov <asml.silence@gmail.com>
>
> Currently net_iov stores a pointer to struct dmabuf_genpool_chunk_owner,
> which serves as a useful abstraction to share data and provide a
> context. However, it's too devmem specific, and we want to reuse it for
> other memory providers, and for that we need to decouple net_iov from
> devmem. Make net_iov to point to a new base structure called
> net_iov_area, which dmabuf_genpool_chunk_owner extends.


Similar feeling to Stan initially. I also thought you'd reuse
dmabuf_genpool_chunk_owner. Seems like you're doing that but also
renaming it to net_iov_area almost, which seems fine.

I guess, with this patch, there is no way to tell, given just a
net_iov whether it's dmabuf or something else, right? I wonder if
that's an issue. In my mind when an skb is in tcp_recvmsg() we need to
make sure it's a dmabuf net_iov specifically to call
tcp_recvmsg_dmabuf for example. I'll look deeper here.

...

>
>  static inline struct dmabuf_genpool_chunk_owner *
> -net_iov_owner(const struct net_iov *niov)
> +net_devmem_iov_to_chunk_owner(const struct net_iov *niov)
>  {
> -       return niov->owner;
> -}
> +       struct net_iov_area *owner = net_iov_owner(niov);
>
> -static inline unsigned int net_iov_idx(const struct net_iov *niov)
> -{
> -       return niov - net_iov_owner(niov)->niovs;
> +       return container_of(owner, struct dmabuf_genpool_chunk_owner, area);

Couldn't this end up returning garbage if the net_iov is not actually
a dmabuf one? Is that handled somewhere in a later patch that I
missed?
Pavel Begunkov Oct. 9, 2024, 10:13 p.m. UTC | #5
On 10/9/24 21:44, Mina Almasry wrote:
> On Mon, Oct 7, 2024 at 3:16 PM David Wei <dw@davidwei.uk> wrote:
>>
>> From: Pavel Begunkov <asml.silence@gmail.com>
>>
>> Currently net_iov stores a pointer to struct dmabuf_genpool_chunk_owner,
>> which serves as a useful abstraction to share data and provide a
>> context. However, it's too devmem specific, and we want to reuse it for
>> other memory providers, and for that we need to decouple net_iov from
>> devmem. Make net_iov to point to a new base structure called
>> net_iov_area, which dmabuf_genpool_chunk_owner extends.
> 
> 
> Similar feeling to Stan initially. I also thought you'd reuse
> dmabuf_genpool_chunk_owner. Seems like you're doing that but also
> renaming it to net_iov_area almost, which seems fine.
> 
> I guess, with this patch, there is no way to tell, given just a
> net_iov whether it's dmabuf or something else, right? I wonder if

By intention there is no good/clear way to tell if it's a dmabuf
or page backed net_iov in the generic path, but you can easily
check if it's devmem or io_uring by comparing page pool's ops.
net_iov::pp should always be available when it's in the net stack.
5/15 does exactly that in the devmem tcp portion of tcp.c.

> that's an issue. In my mind when an skb is in tcp_recvmsg() we need to
> make sure it's a dmabuf net_iov specifically to call
> tcp_recvmsg_dmabuf for example. I'll look deeper here.

Mentioned above, patch 5/15 handles that.

>>   static inline struct dmabuf_genpool_chunk_owner *
>> -net_iov_owner(const struct net_iov *niov)
>> +net_devmem_iov_to_chunk_owner(const struct net_iov *niov)
>>   {
>> -       return niov->owner;
>> -}
>> +       struct net_iov_area *owner = net_iov_owner(niov);
>>
>> -static inline unsigned int net_iov_idx(const struct net_iov *niov)
>> -{
>> -       return niov - net_iov_owner(niov)->niovs;
>> +       return container_of(owner, struct dmabuf_genpool_chunk_owner, area);
> 
> Couldn't this end up returning garbage if the net_iov is not actually
> a dmabuf one? Is that handled somewhere in a later patch that I
> missed?

Surely it will if someone manages to use it with non-devmem net_iovs,
which is why I renamed it to "devmem*".
Pavel Begunkov Oct. 9, 2024, 10:19 p.m. UTC | #6
On 10/9/24 21:44, Mina Almasry wrote:
> On Mon, Oct 7, 2024 at 3:16 PM David Wei <dw@davidwei.uk> wrote:
>>
>> From: Pavel Begunkov <asml.silence@gmail.com>
>>
>> Currently net_iov stores a pointer to struct dmabuf_genpool_chunk_owner,
>> which serves as a useful abstraction to share data and provide a
>> context. However, it's too devmem specific, and we want to reuse it for
>> other memory providers, and for that we need to decouple net_iov from
>> devmem. Make net_iov to point to a new base structure called
>> net_iov_area, which dmabuf_genpool_chunk_owner extends.
> 
> 
> Similar feeling to Stan initially. I also thought you'd reuse
> dmabuf_genpool_chunk_owner. Seems like you're doing that but also
> renaming it to net_iov_area almost, which seems fine.

I did give it a thought long time ago, was thinking to have
chunk_owner / area to store a pointer to some kind of context,
i.e. binding for devmem, but then you need to store void* instead
of well typed net_devmem_dmabuf_binding / etc., while it'd still
need to cast the owner / area, e.g. io_uring needs a fair share of
additional fields.
David Wei Oct. 11, 2024, 6:44 p.m. UTC | #7
On 2024-10-09 09:28, Stanislav Fomichev wrote:
> On 10/08, Pavel Begunkov wrote:
>> On 10/8/24 16:46, Stanislav Fomichev wrote:
>>> On 10/07, David Wei wrote:
>>>> From: Pavel Begunkov <asml.silence@gmail.com>
>>>>
>>>> Currently net_iov stores a pointer to struct dmabuf_genpool_chunk_owner,
>>>> which serves as a useful abstraction to share data and provide a
>>>> context. However, it's too devmem specific, and we want to reuse it for
>>>> other memory providers, and for that we need to decouple net_iov from
>>>> devmem. Make net_iov to point to a new base structure called
>>>> net_iov_area, which dmabuf_genpool_chunk_owner extends.
>>>>
>>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>>> Signed-off-by: David Wei <dw@davidwei.uk>
>>>> ---
>>>>   include/net/netmem.h | 21 ++++++++++++++++++++-
>>>>   net/core/devmem.c    | 25 +++++++++++++------------
>>>>   net/core/devmem.h    | 25 +++++++++----------------
>>>>   3 files changed, 42 insertions(+), 29 deletions(-)
>>>>
>>>> diff --git a/include/net/netmem.h b/include/net/netmem.h
>>>> index 8a6e20be4b9d..3795ded30d2c 100644
>>>> --- a/include/net/netmem.h
>>>> +++ b/include/net/netmem.h
>>>> @@ -24,11 +24,20 @@ struct net_iov {
>>>>   	unsigned long __unused_padding;
>>>>   	unsigned long pp_magic;
>>>>   	struct page_pool *pp;
>>>> -	struct dmabuf_genpool_chunk_owner *owner;
>>>> +	struct net_iov_area *owner;
>>>
>>> Any reason not to use dmabuf_genpool_chunk_owner as is (or rename it
>>> to net_iov_area to generalize) with the fields that you don't need
>>> set to 0/NULL? container_of makes everything harder to follow :-(
>>
>> It can be that, but then io_uring would have a (null) pointer to
>> struct net_devmem_dmabuf_binding it knows nothing about and other
>> fields devmem might add in the future. Also, it reduces the
>> temptation for the common code to make assumptions about the origin
>> of the area / pp memory provider. IOW, I think it's cleaner
>> when separated like in this patch.
> 
> Ack, let's see whether other people find any issues with this approach.
> For me, it makes the devmem parts harder to read, so my preference
> is on dropping this patch and keeping owner=null on your side.

I don't mind at this point which approach to take right now. I would
prefer keeping dmabuf_genpool_chunk_owner today even if it results in a
nullptr in io_uring's case. Once there are more memory providers in the
future, I think it'll be clearer what sort of abstraction we might need
here.
Pavel Begunkov Oct. 11, 2024, 10:02 p.m. UTC | #8
On 10/11/24 19:44, David Wei wrote:
> On 2024-10-09 09:28, Stanislav Fomichev wrote:
>> On 10/08, Pavel Begunkov wrote:
>>> On 10/8/24 16:46, Stanislav Fomichev wrote:
>>>> On 10/07, David Wei wrote:
>>>>> From: Pavel Begunkov <asml.silence@gmail.com>
>>>>>
>>>>> Currently net_iov stores a pointer to struct dmabuf_genpool_chunk_owner,
>>>>> which serves as a useful abstraction to share data and provide a
>>>>> context. However, it's too devmem specific, and we want to reuse it for
>>>>> other memory providers, and for that we need to decouple net_iov from
>>>>> devmem. Make net_iov to point to a new base structure called
>>>>> net_iov_area, which dmabuf_genpool_chunk_owner extends.
>>>>>
>>>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>>>> Signed-off-by: David Wei <dw@davidwei.uk>
>>>>> ---
>>>>>    include/net/netmem.h | 21 ++++++++++++++++++++-
>>>>>    net/core/devmem.c    | 25 +++++++++++++------------
>>>>>    net/core/devmem.h    | 25 +++++++++----------------
>>>>>    3 files changed, 42 insertions(+), 29 deletions(-)
>>>>>
>>>>> diff --git a/include/net/netmem.h b/include/net/netmem.h
>>>>> index 8a6e20be4b9d..3795ded30d2c 100644
>>>>> --- a/include/net/netmem.h
>>>>> +++ b/include/net/netmem.h
>>>>> @@ -24,11 +24,20 @@ struct net_iov {
>>>>>    	unsigned long __unused_padding;
>>>>>    	unsigned long pp_magic;
>>>>>    	struct page_pool *pp;
>>>>> -	struct dmabuf_genpool_chunk_owner *owner;
>>>>> +	struct net_iov_area *owner;
>>>>
>>>> Any reason not to use dmabuf_genpool_chunk_owner as is (or rename it
>>>> to net_iov_area to generalize) with the fields that you don't need
>>>> set to 0/NULL? container_of makes everything harder to follow :-(
>>>
>>> It can be that, but then io_uring would have a (null) pointer to
>>> struct net_devmem_dmabuf_binding it knows nothing about and other
>>> fields devmem might add in the future. Also, it reduces the
>>> temptation for the common code to make assumptions about the origin
>>> of the area / pp memory provider. IOW, I think it's cleaner
>>> when separated like in this patch.
>>
>> Ack, let's see whether other people find any issues with this approach.
>> For me, it makes the devmem parts harder to read, so my preference
>> is on dropping this patch and keeping owner=null on your side.
> 
> I don't mind at this point which approach to take right now. I would
> prefer keeping dmabuf_genpool_chunk_owner today even if it results in a
> nullptr in io_uring's case. Once there are more memory providers in the
> future, I think it'll be clearer what sort of abstraction we might need
> here.

That's the thing about abstractions, if we say that devmem is the
only first class citizen for net_iov and everything else by definition
is 2nd class that should strictly follow devmem TCP patterns, and/or
that struct dmabuf_genpool_chunk_owner is an integral part of net_iov
and should be reused by everyone, then preserving the current state
of the chunk owner is likely the right long term approach. If not, and
net_iov is actually a generic piece of infrastructure, then IMHO there
is no place for devmem sticking out of every bit single bit of it, with
structures that are devmem specific and can even be not defined without
devmem TCP enabled (fwiw, which is not an actual problem for
compilation, juts oddness).

This patch is one way to do it. The other way assumed is to
convert that binding pointer field to a type-less / void *
context / private pointer, but that seems worse. The difference
starts and the chunk owners, i.e. io_uring's area has to extend
the structure, and we'd still need to cast both that private filed
and the chunk owner / area (with container_of), + a couple more
reasons on top.
Mina Almasry Oct. 11, 2024, 10:25 p.m. UTC | #9
On Fri, Oct 11, 2024 at 3:02 PM Pavel Begunkov <asml.silence@gmail.com> wrote:
>
> On 10/11/24 19:44, David Wei wrote:
> > On 2024-10-09 09:28, Stanislav Fomichev wrote:
> >> On 10/08, Pavel Begunkov wrote:
> >>> On 10/8/24 16:46, Stanislav Fomichev wrote:
> >>>> On 10/07, David Wei wrote:
> >>>>> From: Pavel Begunkov <asml.silence@gmail.com>
> >>>>>
> >>>>> Currently net_iov stores a pointer to struct dmabuf_genpool_chunk_owner,
> >>>>> which serves as a useful abstraction to share data and provide a
> >>>>> context. However, it's too devmem specific, and we want to reuse it for
> >>>>> other memory providers, and for that we need to decouple net_iov from
> >>>>> devmem. Make net_iov to point to a new base structure called
> >>>>> net_iov_area, which dmabuf_genpool_chunk_owner extends.
> >>>>>
> >>>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> >>>>> Signed-off-by: David Wei <dw@davidwei.uk>
> >>>>> ---
> >>>>>    include/net/netmem.h | 21 ++++++++++++++++++++-
> >>>>>    net/core/devmem.c    | 25 +++++++++++++------------
> >>>>>    net/core/devmem.h    | 25 +++++++++----------------
> >>>>>    3 files changed, 42 insertions(+), 29 deletions(-)
> >>>>>
> >>>>> diff --git a/include/net/netmem.h b/include/net/netmem.h
> >>>>> index 8a6e20be4b9d..3795ded30d2c 100644
> >>>>> --- a/include/net/netmem.h
> >>>>> +++ b/include/net/netmem.h
> >>>>> @@ -24,11 +24,20 @@ struct net_iov {
> >>>>>           unsigned long __unused_padding;
> >>>>>           unsigned long pp_magic;
> >>>>>           struct page_pool *pp;
> >>>>> - struct dmabuf_genpool_chunk_owner *owner;
> >>>>> + struct net_iov_area *owner;
> >>>>
> >>>> Any reason not to use dmabuf_genpool_chunk_owner as is (or rename it
> >>>> to net_iov_area to generalize) with the fields that you don't need
> >>>> set to 0/NULL? container_of makes everything harder to follow :-(
> >>>
> >>> It can be that, but then io_uring would have a (null) pointer to
> >>> struct net_devmem_dmabuf_binding it knows nothing about and other
> >>> fields devmem might add in the future. Also, it reduces the
> >>> temptation for the common code to make assumptions about the origin
> >>> of the area / pp memory provider. IOW, I think it's cleaner
> >>> when separated like in this patch.
> >>
> >> Ack, let's see whether other people find any issues with this approach.
> >> For me, it makes the devmem parts harder to read, so my preference
> >> is on dropping this patch and keeping owner=null on your side.
> >
> > I don't mind at this point which approach to take right now. I would
> > prefer keeping dmabuf_genpool_chunk_owner today even if it results in a
> > nullptr in io_uring's case. Once there are more memory providers in the
> > future, I think it'll be clearer what sort of abstraction we might need
> > here.
>
> That's the thing about abstractions, if we say that devmem is the
> only first class citizen for net_iov and everything else by definition
> is 2nd class that should strictly follow devmem TCP patterns, and/or
> that struct dmabuf_genpool_chunk_owner is an integral part of net_iov
> and should be reused by everyone, then preserving the current state
> of the chunk owner is likely the right long term approach. If not, and
> net_iov is actually a generic piece of infrastructure, then IMHO there
> is no place for devmem sticking out of every bit single bit of it, with
> structures that are devmem specific and can even be not defined without
> devmem TCP enabled (fwiw, which is not an actual problem for
> compilation, juts oddness).
>

There is no intention of devmem TCP being a first class citizen or
anything. Abstractly speaking, we're going to draw a line in the sand
and say everything past this line is devmem specific and should be
replaced by other users. In this patch you drew the line between
dmabuf_genpool_chunk_owner and net_iov_area, which is fine by me on
first look. What Stan and I were thinking at first glance is
preserving dmabuf_* (and renaming) and drawing the line somewhere
else, which would have also been fine.

My real issue is whether its safe to do all this container_of while
not always checking explicitly for the type of net_iov. I'm not 100%
sure checking in tcp.c alone is enough, yet. I need to take a deeper
look, no changes requested from me yet.

FWIW I'm out for the next couple of weeks. I'll have time to take a
look during that but not as much as now.
Pavel Begunkov Oct. 11, 2024, 11:12 p.m. UTC | #10
On 10/11/24 23:25, Mina Almasry wrote:
> On Fri, Oct 11, 2024 at 3:02 PM Pavel Begunkov <asml.silence@gmail.com> wrote:
>>
>> On 10/11/24 19:44, David Wei wrote:
>>> On 2024-10-09 09:28, Stanislav Fomichev wrote:
>>>> On 10/08, Pavel Begunkov wrote:
>>>>> On 10/8/24 16:46, Stanislav Fomichev wrote:
>>>>>> On 10/07, David Wei wrote:
>>>>>>> From: Pavel Begunkov <asml.silence@gmail.com>
>>>>>>>
>>>>>>> Currently net_iov stores a pointer to struct dmabuf_genpool_chunk_owner,
>>>>>>> which serves as a useful abstraction to share data and provide a
>>>>>>> context. However, it's too devmem specific, and we want to reuse it for
>>>>>>> other memory providers, and for that we need to decouple net_iov from
>>>>>>> devmem. Make net_iov to point to a new base structure called
>>>>>>> net_iov_area, which dmabuf_genpool_chunk_owner extends.
>>>>>>>
>>>>>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>>>>>> Signed-off-by: David Wei <dw@davidwei.uk>
>>>>>>> ---
>>>>>>>     include/net/netmem.h | 21 ++++++++++++++++++++-
>>>>>>>     net/core/devmem.c    | 25 +++++++++++++------------
>>>>>>>     net/core/devmem.h    | 25 +++++++++----------------
>>>>>>>     3 files changed, 42 insertions(+), 29 deletions(-)
>>>>>>>
>>>>>>> diff --git a/include/net/netmem.h b/include/net/netmem.h
>>>>>>> index 8a6e20be4b9d..3795ded30d2c 100644
>>>>>>> --- a/include/net/netmem.h
>>>>>>> +++ b/include/net/netmem.h
>>>>>>> @@ -24,11 +24,20 @@ struct net_iov {
>>>>>>>            unsigned long __unused_padding;
>>>>>>>            unsigned long pp_magic;
>>>>>>>            struct page_pool *pp;
>>>>>>> - struct dmabuf_genpool_chunk_owner *owner;
>>>>>>> + struct net_iov_area *owner;
>>>>>>
>>>>>> Any reason not to use dmabuf_genpool_chunk_owner as is (or rename it
>>>>>> to net_iov_area to generalize) with the fields that you don't need
>>>>>> set to 0/NULL? container_of makes everything harder to follow :-(
>>>>>
>>>>> It can be that, but then io_uring would have a (null) pointer to
>>>>> struct net_devmem_dmabuf_binding it knows nothing about and other
>>>>> fields devmem might add in the future. Also, it reduces the
>>>>> temptation for the common code to make assumptions about the origin
>>>>> of the area / pp memory provider. IOW, I think it's cleaner
>>>>> when separated like in this patch.
>>>>
>>>> Ack, let's see whether other people find any issues with this approach.
>>>> For me, it makes the devmem parts harder to read, so my preference
>>>> is on dropping this patch and keeping owner=null on your side.
>>>
>>> I don't mind at this point which approach to take right now. I would
>>> prefer keeping dmabuf_genpool_chunk_owner today even if it results in a
>>> nullptr in io_uring's case. Once there are more memory providers in the
>>> future, I think it'll be clearer what sort of abstraction we might need
>>> here.
>>
>> That's the thing about abstractions, if we say that devmem is the
>> only first class citizen for net_iov and everything else by definition
>> is 2nd class that should strictly follow devmem TCP patterns, and/or
>> that struct dmabuf_genpool_chunk_owner is an integral part of net_iov
>> and should be reused by everyone, then preserving the current state
>> of the chunk owner is likely the right long term approach. If not, and
>> net_iov is actually a generic piece of infrastructure, then IMHO there
>> is no place for devmem sticking out of every bit single bit of it, with
>> structures that are devmem specific and can even be not defined without
>> devmem TCP enabled (fwiw, which is not an actual problem for
>> compilation, juts oddness).
>>
> 
> There is no intention of devmem TCP being a first class citizen or
> anything.

Let me note to avoid being misread, that kind of prioritisation can
have place and that's fine, but that usually happens when you build
on top of older code or user base sizes are much different. And
again, theoretically dmabuf_genpool_chunk_owner could be common code,
i.e. if you want to use dmabuf you need to use the structure
regardless of the provider of choice, and it'll do all dmabuf
handling. But the current chunk owner goes beyond that, and
would need some splitting if someone tries to have that kind of
an abstraction.

> Abstractly speaking, we're going to draw a line in the sand
> and say everything past this line is devmem specific and should be
> replaced by other users. In this patch you drew the line between
> dmabuf_genpool_chunk_owner and net_iov_area, which is fine by me on
> first look. What Stan and I were thinking at first glance is
> preserving dmabuf_* (and renaming) and drawing the line somewhere
> else, which would have also been fine.

True enough, I drew the line when it was convenient, io_uring
needs an extendible abstraction that binds net_iovs, and we'll
also have several different sets of net_iovs, so it fell onto
the object holding the net_iov array as the most natural option.
In that sense, we could've had that binding pointing to an
allocated io_zcrx_area, which would then point further into
io_uring, but that's one extra indirection.

As an viable alternative I don't like that much, instead of
trying to share struct net_iov_area, we can just make struct
struct net_iov::owner completely provider dependent and make
it void *, providers will be allowed to store there whatever
they wish.

> My real issue is whether its safe to do all this container_of while
> not always checking explicitly for the type of net_iov. I'm not 100%
> sure checking in tcp.c alone is enough, yet. I need to take a deeper
> look, no changes requested from me yet.

That's done the typical way everything in the kernel and just
inheritance works. When you get into devmem.c the page pool
callbacks, you for sure know that net_iov's passed are devmem's,
nobody should ever take one net_iov's ops and call it with a
second net_iov without care, the page pool follows it.

When someone wants to operate with devmem net_iov's but doesn't
have a callback it has to validate the origin as tcp.c now does
in 5/15. The nice part is that this patch changes types, so all
such places either explicitly listed in this patch, or it has to
pass through one of the devmem.h helpers, which is yet trivially
checkable.

> FWIW I'm out for the next couple of weeks. I'll have time to take a
> look during that but not as much as now.
>
diff mbox series

Patch

diff --git a/include/net/netmem.h b/include/net/netmem.h
index 8a6e20be4b9d..3795ded30d2c 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -24,11 +24,20 @@  struct net_iov {
 	unsigned long __unused_padding;
 	unsigned long pp_magic;
 	struct page_pool *pp;
-	struct dmabuf_genpool_chunk_owner *owner;
+	struct net_iov_area *owner;
 	unsigned long dma_addr;
 	atomic_long_t pp_ref_count;
 };
 
+struct net_iov_area {
+	/* Array of net_iovs for this area. */
+	struct net_iov *niovs;
+	size_t num_niovs;
+
+	/* Offset into the dma-buf where this chunk starts.  */
+	unsigned long base_virtual;
+};
+
 /* These fields in struct page are used by the page_pool and net stack:
  *
  *        struct {
@@ -54,6 +63,16 @@  NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
 NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
 #undef NET_IOV_ASSERT_OFFSET
 
+static inline struct net_iov_area *net_iov_owner(const struct net_iov *niov)
+{
+	return niov->owner;
+}
+
+static inline unsigned int net_iov_idx(const struct net_iov *niov)
+{
+	return niov - net_iov_owner(niov)->niovs;
+}
+
 /* netmem */
 
 /**
diff --git a/net/core/devmem.c b/net/core/devmem.c
index 858982858f81..5c10cf0e2a18 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -32,14 +32,15 @@  static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool,
 {
 	struct dmabuf_genpool_chunk_owner *owner = chunk->owner;
 
-	kvfree(owner->niovs);
+	kvfree(owner->area.niovs);
 	kfree(owner);
 }
 
 static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov)
 {
-	struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
+	struct dmabuf_genpool_chunk_owner *owner;
 
+	owner = net_devmem_iov_to_chunk_owner(niov);
 	return owner->base_dma_addr +
 	       ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT);
 }
@@ -82,7 +83,7 @@  net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
 
 	offset = dma_addr - owner->base_dma_addr;
 	index = offset / PAGE_SIZE;
-	niov = &owner->niovs[index];
+	niov = &owner->area.niovs[index];
 
 	niov->pp_magic = 0;
 	niov->pp = NULL;
@@ -250,9 +251,9 @@  net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
 			goto err_free_chunks;
 		}
 
-		owner->base_virtual = virtual;
+		owner->area.base_virtual = virtual;
 		owner->base_dma_addr = dma_addr;
-		owner->num_niovs = len / PAGE_SIZE;
+		owner->area.num_niovs = len / PAGE_SIZE;
 		owner->binding = binding;
 
 		err = gen_pool_add_owner(binding->chunk_pool, dma_addr,
@@ -264,17 +265,17 @@  net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
 			goto err_free_chunks;
 		}
 
-		owner->niovs = kvmalloc_array(owner->num_niovs,
-					      sizeof(*owner->niovs),
-					      GFP_KERNEL);
-		if (!owner->niovs) {
+		owner->area.niovs = kvmalloc_array(owner->area.num_niovs,
+						   sizeof(*owner->area.niovs),
+						   GFP_KERNEL);
+		if (!owner->area.niovs) {
 			err = -ENOMEM;
 			goto err_free_chunks;
 		}
 
-		for (i = 0; i < owner->num_niovs; i++) {
-			niov = &owner->niovs[i];
-			niov->owner = owner;
+		for (i = 0; i < owner->area.num_niovs; i++) {
+			niov = &owner->area.niovs[i];
+			niov->owner = &owner->area;
 			page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
 						      net_devmem_get_dma_addr(niov));
 		}
diff --git a/net/core/devmem.h b/net/core/devmem.h
index 80f38fe46930..12b14377ed3f 100644
--- a/net/core/devmem.h
+++ b/net/core/devmem.h
@@ -10,6 +10,8 @@ 
 #ifndef _NET_DEVMEM_H
 #define _NET_DEVMEM_H
 
+#include <net/netmem.h>
+
 struct netlink_ext_ack;
 
 struct net_devmem_dmabuf_binding {
@@ -50,34 +52,25 @@  struct net_devmem_dmabuf_binding {
  * allocations from this chunk.
  */
 struct dmabuf_genpool_chunk_owner {
-	/* Offset into the dma-buf where this chunk starts.  */
-	unsigned long base_virtual;
+	struct net_iov_area area;
+	struct net_devmem_dmabuf_binding *binding;
 
 	/* dma_addr of the start of the chunk.  */
 	dma_addr_t base_dma_addr;
-
-	/* Array of net_iovs for this chunk. */
-	struct net_iov *niovs;
-	size_t num_niovs;
-
-	struct net_devmem_dmabuf_binding *binding;
 };
 
 static inline struct dmabuf_genpool_chunk_owner *
-net_iov_owner(const struct net_iov *niov)
+net_devmem_iov_to_chunk_owner(const struct net_iov *niov)
 {
-	return niov->owner;
-}
+	struct net_iov_area *owner = net_iov_owner(niov);
 
-static inline unsigned int net_iov_idx(const struct net_iov *niov)
-{
-	return niov - net_iov_owner(niov)->niovs;
+	return container_of(owner, struct dmabuf_genpool_chunk_owner, area);
 }
 
 static inline struct net_devmem_dmabuf_binding *
 net_devmem_iov_binding(const struct net_iov *niov)
 {
-	return net_iov_owner(niov)->binding;
+	return net_devmem_iov_to_chunk_owner(niov)->binding;
 }
 
 static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov)
@@ -87,7 +80,7 @@  static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov)
 
 static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
 {
-	struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
+	struct net_iov_area *owner = net_iov_owner(niov);
 
 	return owner->base_virtual +
 	       ((unsigned long)net_iov_idx(niov) << PAGE_SHIFT);