diff mbox series

[bpf,2/3] libbpf: restore umem state after socket create failure

Message ID 20210324141337.29269-3-ciara.loftus@intel.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series AF_XDP Socket Creation Fixes | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf
netdev/subject_prefix success Link
netdev/cc_maintainers fail 1 blamed authors not CCed: daniel@iogearbox.net; 9 maintainers not CCed: jonathan.lemon@gmail.com yhs@fb.com kpsingh@kernel.org daniel@iogearbox.net andrii@kernel.org kafai@fb.com ast@kernel.org john.fastabend@gmail.com songliubraving@fb.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes fail Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 59 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success Link

Commit Message

Ciara Loftus March 24, 2021, 2:13 p.m. UTC
If the call to socket_create fails, the user may want to retry the
socket creation using the same umem. Ensure that the umem is in the
same state on exit if the call failed by restoring the _save pointers
and not unmapping the set of umem rings if those pointers are non NULL.

Fixes: 2f6324a3937f ("libbpf: Support shared umems between queues and devices")

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
---
 tools/lib/bpf/xsk.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

Comments

Magnus Karlsson March 26, 2021, 9:06 a.m. UTC | #1
On Wed, Mar 24, 2021 at 3:46 PM Ciara Loftus <ciara.loftus@intel.com> wrote:
>
> If the call to socket_create fails, the user may want to retry the
> socket creation using the same umem. Ensure that the umem is in the
> same state on exit if the call failed by restoring the _save pointers
> and not unmapping the set of umem rings if those pointers are non NULL.
>
> Fixes: 2f6324a3937f ("libbpf: Support shared umems between queues and devices")
>
> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> ---
>  tools/lib/bpf/xsk.c | 29 ++++++++++++++++++-----------
>  1 file changed, 18 insertions(+), 11 deletions(-)
>
> diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> index 443b0cfb45e8..ec3c23299329 100644
> --- a/tools/lib/bpf/xsk.c
> +++ b/tools/lib/bpf/xsk.c
> @@ -743,21 +743,23 @@ static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
>         return NULL;
>  }
>
> -static void xsk_put_ctx(struct xsk_ctx *ctx)
> +static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
>  {
>         struct xsk_umem *umem = ctx->umem;
>         struct xdp_mmap_offsets off;
>         int err;
>
>         if (--ctx->refcount == 0) {
> -               err = xsk_get_mmap_offsets(umem->fd, &off);
> -               if (!err) {
> -                       munmap(ctx->fill->ring - off.fr.desc,
> -                              off.fr.desc + umem->config.fill_size *
> -                              sizeof(__u64));
> -                       munmap(ctx->comp->ring - off.cr.desc,
> -                              off.cr.desc + umem->config.comp_size *
> -                              sizeof(__u64));
> +               if (unmap) {
> +                       err = xsk_get_mmap_offsets(umem->fd, &off);
> +                       if (!err) {
> +                               munmap(ctx->fill->ring - off.fr.desc,
> +                                      off.fr.desc + umem->config.fill_size *
> +                               sizeof(__u64));
> +                               munmap(ctx->comp->ring - off.cr.desc,
> +                                      off.cr.desc + umem->config.comp_size *
> +                               sizeof(__u64));
> +                       }
>                 }

By not unmapping these rings we actually leave more state after a
failed socket creation. So how about skipping this logic (and
everything below) and always unmap the rings at failure as before, but
we move the fill_save = NULL and comp_save = NULL from xsk_create_ctx
to the end of xsk_socket__create_shared just before the "return 0"
where we know that the whole operation has succeeded. This way the
mappings would be redone during the next xsk_socket__create and if
someone decides not to retry (for some reason) we do not leave two
mappings behind. Would simplify things. What do you think?

>
>                 list_del(&ctx->list);
> @@ -854,6 +856,9 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
>         struct xsk_socket *xsk;
>         struct xsk_ctx *ctx;
>         int err, ifindex;
> +       struct xsk_ring_prod *fsave = umem->fill_save;
> +       struct xsk_ring_cons *csave = umem->comp_save;
> +       bool unmap = !fsave;
>
>         if (!umem || !xsk_ptr || !(rx || tx))
>                 return -EFAULT;
> @@ -1005,7 +1010,9 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
>                 munmap(rx_map, off.rx.desc +
>                        xsk->config.rx_size * sizeof(struct xdp_desc));
>  out_put_ctx:
> -       xsk_put_ctx(ctx);
> +       umem->fill_save = fsave;
> +       umem->comp_save = csave;
> +       xsk_put_ctx(ctx, unmap);
>  out_socket:
>         if (--umem->refcount)
>                 close(xsk->fd);
> @@ -1071,7 +1078,7 @@ void xsk_socket__delete(struct xsk_socket *xsk)
>                 }
>         }
>
> -       xsk_put_ctx(ctx);
> +       xsk_put_ctx(ctx, true);
>
>         umem->refcount--;
>         /* Do not close an fd that also has an associated umem connected
> --
> 2.17.1
>
Ciara Loftus March 26, 2021, 2:56 p.m. UTC | #2
> 
> On Wed, Mar 24, 2021 at 3:46 PM Ciara Loftus <ciara.loftus@intel.com>
> wrote:
> >
> > If the call to socket_create fails, the user may want to retry the
> > socket creation using the same umem. Ensure that the umem is in the
> > same state on exit if the call failed by restoring the _save pointers
> > and not unmapping the set of umem rings if those pointers are non NULL.
> >
> > Fixes: 2f6324a3937f ("libbpf: Support shared umems between queues and
> devices")
> >
> > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> > ---
> >  tools/lib/bpf/xsk.c | 29 ++++++++++++++++++-----------
> >  1 file changed, 18 insertions(+), 11 deletions(-)
> >
> > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> > index 443b0cfb45e8..ec3c23299329 100644
> > --- a/tools/lib/bpf/xsk.c
> > +++ b/tools/lib/bpf/xsk.c
> > @@ -743,21 +743,23 @@ static struct xsk_ctx *xsk_get_ctx(struct
> xsk_umem *umem, int ifindex,
> >         return NULL;
> >  }
> >
> > -static void xsk_put_ctx(struct xsk_ctx *ctx)
> > +static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
> >  {
> >         struct xsk_umem *umem = ctx->umem;
> >         struct xdp_mmap_offsets off;
> >         int err;
> >
> >         if (--ctx->refcount == 0) {
> > -               err = xsk_get_mmap_offsets(umem->fd, &off);
> > -               if (!err) {
> > -                       munmap(ctx->fill->ring - off.fr.desc,
> > -                              off.fr.desc + umem->config.fill_size *
> > -                              sizeof(__u64));
> > -                       munmap(ctx->comp->ring - off.cr.desc,
> > -                              off.cr.desc + umem->config.comp_size *
> > -                              sizeof(__u64));
> > +               if (unmap) {
> > +                       err = xsk_get_mmap_offsets(umem->fd, &off);
> > +                       if (!err) {
> > +                               munmap(ctx->fill->ring - off.fr.desc,
> > +                                      off.fr.desc + umem->config.fill_size *
> > +                               sizeof(__u64));
> > +                               munmap(ctx->comp->ring - off.cr.desc,
> > +                                      off.cr.desc + umem->config.comp_size *
> > +                               sizeof(__u64));
> > +                       }
> >                 }
> 
> By not unmapping these rings we actually leave more state after a
> failed socket creation. So how about skipping this logic (and

In the case of the _save rings, the maps existed before the call to
xsk_socket__create. They were created during xsk_umem__create.
So we should preserve these maps in event of failure.
I was using the wrong condition to trigger the unmap in v1 however.
We should unmap 'fill' only if
        umem->fill_save != fill
I will update this in a v2.

> everything below) and always unmap the rings at failure as before, but
> we move the fill_save = NULL and comp_save = NULL from xsk_create_ctx
> to the end of xsk_socket__create_shared just before the "return 0"
> where we know that the whole operation has succeeded. This way the

I think moving these still makes sense and will add this in the next rev.

Thanks for the feedback and suggestions!

Ciara

> mappings would be redone during the next xsk_socket__create and if
> someone decides not to retry (for some reason) we do not leave two
> mappings behind. Would simplify things. What do you think?

> 
> >
> >                 list_del(&ctx->list);
> > @@ -854,6 +856,9 @@ int xsk_socket__create_shared(struct xsk_socket
> **xsk_ptr,
> >         struct xsk_socket *xsk;
> >         struct xsk_ctx *ctx;
> >         int err, ifindex;
> > +       struct xsk_ring_prod *fsave = umem->fill_save;
> > +       struct xsk_ring_cons *csave = umem->comp_save;
> > +       bool unmap = !fsave;
> >
> >         if (!umem || !xsk_ptr || !(rx || tx))
> >                 return -EFAULT;
> > @@ -1005,7 +1010,9 @@ int xsk_socket__create_shared(struct xsk_socket
> **xsk_ptr,
> >                 munmap(rx_map, off.rx.desc +
> >                        xsk->config.rx_size * sizeof(struct xdp_desc));
> >  out_put_ctx:
> > -       xsk_put_ctx(ctx);
> > +       umem->fill_save = fsave;
> > +       umem->comp_save = csave;
> > +       xsk_put_ctx(ctx, unmap);
> >  out_socket:
> >         if (--umem->refcount)
> >                 close(xsk->fd);
> > @@ -1071,7 +1078,7 @@ void xsk_socket__delete(struct xsk_socket *xsk)
> >                 }
> >         }
> >
> > -       xsk_put_ctx(ctx);
> > +       xsk_put_ctx(ctx, true);
> >
> >         umem->refcount--;
> >         /* Do not close an fd that also has an associated umem connected
> > --
> > 2.17.1
> >
Magnus Karlsson March 26, 2021, 3:20 p.m. UTC | #3
On Fri, Mar 26, 2021 at 3:56 PM Loftus, Ciara <ciara.loftus@intel.com> wrote:
>
> >
> > On Wed, Mar 24, 2021 at 3:46 PM Ciara Loftus <ciara.loftus@intel.com>
> > wrote:
> > >
> > > If the call to socket_create fails, the user may want to retry the
> > > socket creation using the same umem. Ensure that the umem is in the
> > > same state on exit if the call failed by restoring the _save pointers
> > > and not unmapping the set of umem rings if those pointers are non NULL.
> > >
> > > Fixes: 2f6324a3937f ("libbpf: Support shared umems between queues and
> > devices")
> > >
> > > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> > > ---
> > >  tools/lib/bpf/xsk.c | 29 ++++++++++++++++++-----------
> > >  1 file changed, 18 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> > > index 443b0cfb45e8..ec3c23299329 100644
> > > --- a/tools/lib/bpf/xsk.c
> > > +++ b/tools/lib/bpf/xsk.c
> > > @@ -743,21 +743,23 @@ static struct xsk_ctx *xsk_get_ctx(struct
> > xsk_umem *umem, int ifindex,
> > >         return NULL;
> > >  }
> > >
> > > -static void xsk_put_ctx(struct xsk_ctx *ctx)
> > > +static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
> > >  {
> > >         struct xsk_umem *umem = ctx->umem;
> > >         struct xdp_mmap_offsets off;
> > >         int err;
> > >
> > >         if (--ctx->refcount == 0) {
> > > -               err = xsk_get_mmap_offsets(umem->fd, &off);
> > > -               if (!err) {
> > > -                       munmap(ctx->fill->ring - off.fr.desc,
> > > -                              off.fr.desc + umem->config.fill_size *
> > > -                              sizeof(__u64));
> > > -                       munmap(ctx->comp->ring - off.cr.desc,
> > > -                              off.cr.desc + umem->config.comp_size *
> > > -                              sizeof(__u64));
> > > +               if (unmap) {
> > > +                       err = xsk_get_mmap_offsets(umem->fd, &off);
> > > +                       if (!err) {
> > > +                               munmap(ctx->fill->ring - off.fr.desc,
> > > +                                      off.fr.desc + umem->config.fill_size *
> > > +                               sizeof(__u64));
> > > +                               munmap(ctx->comp->ring - off.cr.desc,
> > > +                                      off.cr.desc + umem->config.comp_size *
> > > +                               sizeof(__u64));
> > > +                       }
> > >                 }
> >
> > By not unmapping these rings we actually leave more state after a
> > failed socket creation. So how about skipping this logic (and
>
> In the case of the _save rings, the maps existed before the call to
> xsk_socket__create. They were created during xsk_umem__create.
> So we should preserve these maps in event of failure.
> I was using the wrong condition to trigger the unmap in v1 however.
> We should unmap 'fill' only if
>         umem->fill_save != fill
> I will update this in a v2.

Ahh, you are correct. There are two ways these rings can get allocated
so that has to be taken care of. Please ignore my comment.

> > everything below) and always unmap the rings at failure as before, but
> > we move the fill_save = NULL and comp_save = NULL from xsk_create_ctx
> > to the end of xsk_socket__create_shared just before the "return 0"
> > where we know that the whole operation has succeeded. This way the
>
> I think moving these still makes sense and will add this in the next rev.
>
> Thanks for the feedback and suggestions!
>
> Ciara
>
> > mappings would be redone during the next xsk_socket__create and if
> > someone decides not to retry (for some reason) we do not leave two
> > mappings behind. Would simplify things. What do you think?
>
> >
> > >
> > >                 list_del(&ctx->list);
> > > @@ -854,6 +856,9 @@ int xsk_socket__create_shared(struct xsk_socket
> > **xsk_ptr,
> > >         struct xsk_socket *xsk;
> > >         struct xsk_ctx *ctx;
> > >         int err, ifindex;
> > > +       struct xsk_ring_prod *fsave = umem->fill_save;
> > > +       struct xsk_ring_cons *csave = umem->comp_save;
> > > +       bool unmap = !fsave;
> > >
> > >         if (!umem || !xsk_ptr || !(rx || tx))
> > >                 return -EFAULT;
> > > @@ -1005,7 +1010,9 @@ int xsk_socket__create_shared(struct xsk_socket
> > **xsk_ptr,
> > >                 munmap(rx_map, off.rx.desc +
> > >                        xsk->config.rx_size * sizeof(struct xdp_desc));
> > >  out_put_ctx:
> > > -       xsk_put_ctx(ctx);
> > > +       umem->fill_save = fsave;
> > > +       umem->comp_save = csave;
> > > +       xsk_put_ctx(ctx, unmap);
> > >  out_socket:
> > >         if (--umem->refcount)
> > >                 close(xsk->fd);
> > > @@ -1071,7 +1078,7 @@ void xsk_socket__delete(struct xsk_socket *xsk)
> > >                 }
> > >         }
> > >
> > > -       xsk_put_ctx(ctx);
> > > +       xsk_put_ctx(ctx, true);
> > >
> > >         umem->refcount--;
> > >         /* Do not close an fd that also has an associated umem connected
> > > --
> > > 2.17.1
> > >
diff mbox series

Patch

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 443b0cfb45e8..ec3c23299329 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -743,21 +743,23 @@  static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
 	return NULL;
 }
 
-static void xsk_put_ctx(struct xsk_ctx *ctx)
+static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
 {
 	struct xsk_umem *umem = ctx->umem;
 	struct xdp_mmap_offsets off;
 	int err;
 
 	if (--ctx->refcount == 0) {
-		err = xsk_get_mmap_offsets(umem->fd, &off);
-		if (!err) {
-			munmap(ctx->fill->ring - off.fr.desc,
-			       off.fr.desc + umem->config.fill_size *
-			       sizeof(__u64));
-			munmap(ctx->comp->ring - off.cr.desc,
-			       off.cr.desc + umem->config.comp_size *
-			       sizeof(__u64));
+		if (unmap) {
+			err = xsk_get_mmap_offsets(umem->fd, &off);
+			if (!err) {
+				munmap(ctx->fill->ring - off.fr.desc,
+				       off.fr.desc + umem->config.fill_size *
+				sizeof(__u64));
+				munmap(ctx->comp->ring - off.cr.desc,
+				       off.cr.desc + umem->config.comp_size *
+				sizeof(__u64));
+			}
 		}
 
 		list_del(&ctx->list);
@@ -854,6 +856,9 @@  int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 	struct xsk_socket *xsk;
 	struct xsk_ctx *ctx;
 	int err, ifindex;
+	struct xsk_ring_prod *fsave = umem->fill_save;
+	struct xsk_ring_cons *csave = umem->comp_save;
+	bool unmap = !fsave;
 
 	if (!umem || !xsk_ptr || !(rx || tx))
 		return -EFAULT;
@@ -1005,7 +1010,9 @@  int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 		munmap(rx_map, off.rx.desc +
 		       xsk->config.rx_size * sizeof(struct xdp_desc));
 out_put_ctx:
-	xsk_put_ctx(ctx);
+	umem->fill_save = fsave;
+	umem->comp_save = csave;
+	xsk_put_ctx(ctx, unmap);
 out_socket:
 	if (--umem->refcount)
 		close(xsk->fd);
@@ -1071,7 +1078,7 @@  void xsk_socket__delete(struct xsk_socket *xsk)
 		}
 	}
 
-	xsk_put_ctx(ctx);
+	xsk_put_ctx(ctx, true);
 
 	umem->refcount--;
 	/* Do not close an fd that also has an associated umem connected