diff mbox series

[3/4] net-next: frags: add inetpeer frag_mem tracking

Message ID 20220829114648.GA2409@debian (mailing list archive)
State Handled Elsewhere
Headers show
Series net-next: frags: add adaptive per-peer timeout under load | expand

Commit Message

Richard Gobert Aug. 29, 2022, 11:46 a.m. UTC
Track per-peer fragment memory usage, using the existing per-fqdir
memory tracking logic.

Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
---
 include/net/inet_frag.h                 | 11 ++------
 include/net/inetpeer.h                  |  1 +
 net/ieee802154/6lowpan/reassembly.c     |  2 +-
 net/ipv4/inet_fragment.c                | 36 ++++++++++++++++++++-----
 net/ipv4/inetpeer.c                     |  1 +
 net/ipv4/ip_fragment.c                  |  4 +--
 net/ipv6/netfilter/nf_conntrack_reasm.c |  2 +-
 net/ipv6/reassembly.c                   |  2 +-
 8 files changed, 38 insertions(+), 21 deletions(-)

Comments

Eric Dumazet Aug. 29, 2022, 10:15 p.m. UTC | #1
On Mon, Aug 29, 2022 at 4:48 AM Richard Gobert <richardbgobert@gmail.com> wrote:
>
> Track per-peer fragment memory usage, using the existing per-fqdir
> memory tracking logic.

This is a rather terse changelog.

We tried to get rid of any dependence over inetpeer, which is not
resistant against DDOS attacks.

So I would not add a new dependency.

Also, tracking memory per peer will not really help in case of bursts ?

>
> Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
> ---
>  include/net/inet_frag.h                 | 11 ++------
>  include/net/inetpeer.h                  |  1 +
>  net/ieee802154/6lowpan/reassembly.c     |  2 +-
>  net/ipv4/inet_fragment.c                | 36 ++++++++++++++++++++-----
>  net/ipv4/inetpeer.c                     |  1 +
>  net/ipv4/ip_fragment.c                  |  4 +--
>  net/ipv6/netfilter/nf_conntrack_reasm.c |  2 +-
>  net/ipv6/reassembly.c                   |  2 +-
>  8 files changed, 38 insertions(+), 21 deletions(-)
>
> diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
> index 05d95fad8a1a..077a0ec78a58 100644
> --- a/include/net/inet_frag.h
> +++ b/include/net/inet_frag.h
> @@ -155,15 +155,8 @@ static inline long frag_mem_limit(const struct fqdir *fqdir)
>         return atomic_long_read(&fqdir->mem);
>  }
>
> -static inline void sub_frag_mem_limit(struct fqdir *fqdir, long val)
> -{
> -       atomic_long_sub(val, &fqdir->mem);
> -}
> -
> -static inline void add_frag_mem_limit(struct fqdir *fqdir, long val)
> -{
> -       atomic_long_add(val, &fqdir->mem);
> -}
> +void sub_frag_mem_limit(struct inet_frag_queue *q, long val);
> +void add_frag_mem_limit(struct inet_frag_queue *q, long val);
>
>  /* RFC 3168 support :
>   * We want to check ECN values of all fragments, do detect invalid combinations.
> diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
> index 74ff688568a0..1c602a706742 100644
> --- a/include/net/inetpeer.h
> +++ b/include/net/inetpeer.h
> @@ -41,6 +41,7 @@ struct inet_peer {
>         u32                     rate_tokens;    /* rate limiting for ICMP */
>         u32                     n_redirects;
>         unsigned long           rate_last;
> +       atomic_long_t           frag_mem;
>         /*
>          * Once inet_peer is queued for deletion (refcnt == 0), following field
>          * is not available: rid
> diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
> index a91283d1e5bf..0bf207e94082 100644
> --- a/net/ieee802154/6lowpan/reassembly.c
> +++ b/net/ieee802154/6lowpan/reassembly.c
> @@ -135,7 +135,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
>                 fq->q.flags |= INET_FRAG_FIRST_IN;
>
>         fq->q.meat += skb->len;
> -       add_frag_mem_limit(fq->q.fqdir, skb->truesize);
> +       add_frag_mem_limit(&fq->q, skb->truesize);
>
>         if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
>             fq->q.meat == fq->q.len) {
> diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
> index c3ec1dbe7081..8b8d77d548d4 100644
> --- a/net/ipv4/inet_fragment.c
> +++ b/net/ipv4/inet_fragment.c
> @@ -250,6 +250,29 @@ void inet_frag_kill(struct inet_frag_queue *fq)
>  }
>  EXPORT_SYMBOL(inet_frag_kill);
>
> +static inline long peer_mem_limit(const struct inet_frag_queue *q)
> +{
> +       if (!q->peer)
> +               return 0;
> +       return atomic_long_read(&q->peer->frag_mem);
> +}
> +
> +void sub_frag_mem_limit(struct inet_frag_queue *q, long val)
> +{
> +       if (q->peer)
> +               atomic_long_sub(val, &q->peer->frag_mem);
> +       atomic_long_sub(val, &q->fqdir->mem);
> +}
> +EXPORT_SYMBOL(sub_frag_mem_limit);
> +
> +void add_frag_mem_limit(struct inet_frag_queue *q, long val)
> +{
> +       if (q->peer)
> +               atomic_long_add(val, &q->peer->frag_mem);
> +       atomic_long_add(val, &q->fqdir->mem);
> +}
> +EXPORT_SYMBOL(add_frag_mem_limit);
> +
>  static void inet_frag_destroy_rcu(struct rcu_head *head)
>  {
>         struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
> @@ -306,9 +329,8 @@ void inet_frag_destroy(struct inet_frag_queue *q)
>         sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
>         sum = sum_truesize + f->qsize;
>
> +       sub_frag_mem_limit(q, sum);
>         inet_frag_free(q);
> -
> -       sub_frag_mem_limit(fqdir, sum);
>  }
>  EXPORT_SYMBOL(inet_frag_destroy);
>
> @@ -324,7 +346,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
>
>         q->fqdir = fqdir;
>         f->constructor(q, arg);
> -       add_frag_mem_limit(fqdir, f->qsize);
> +       add_frag_mem_limit(q, f->qsize);
>
>         timer_setup(&q->timer, f->frag_expire, 0);
>         spin_lock_init(&q->lock);
> @@ -483,7 +505,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
>
>         delta += head->truesize;
>         if (delta)
> -               add_frag_mem_limit(q->fqdir, delta);
> +               add_frag_mem_limit(q, delta);
>
>         /* If the first fragment is fragmented itself, we split
>          * it to two chunks: the first with data and paged part
> @@ -505,7 +527,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
>                 head->truesize += clone->truesize;
>                 clone->csum = 0;
>                 clone->ip_summed = head->ip_summed;
> -               add_frag_mem_limit(q->fqdir, clone->truesize);
> +               add_frag_mem_limit(q, clone->truesize);
>                 skb_shinfo(head)->frag_list = clone;
>                 nextp = &clone->next;
>         } else {
> @@ -575,7 +597,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
>                         rbn = rbnext;
>                 }
>         }
> -       sub_frag_mem_limit(q->fqdir, sum_truesize);
> +       sub_frag_mem_limit(q, sum_truesize);
>
>         *nextp = NULL;
>         skb_mark_not_on_list(head);
> @@ -604,7 +626,7 @@ struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
>         if (head == q->fragments_tail)
>                 q->fragments_tail = NULL;
>
> -       sub_frag_mem_limit(q->fqdir, head->truesize);
> +       sub_frag_mem_limit(q, head->truesize);
>
>         return head;
>  }
> diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
> index e9fed83e9b3c..6e7325dba417 100644
> --- a/net/ipv4/inetpeer.c
> +++ b/net/ipv4/inetpeer.c
> @@ -216,6 +216,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
>                         p->dtime = (__u32)jiffies;
>                         refcount_set(&p->refcnt, 2);
>                         atomic_set(&p->rid, 0);
> +                       atomic_long_set(&p->frag_mem, 0);
>                         p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
>                         p->rate_tokens = 0;
>                         p->n_redirects = 0;
> diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
> index d0c22c41cf26..e35061f6aadb 100644
> --- a/net/ipv4/ip_fragment.c
> +++ b/net/ipv4/ip_fragment.c
> @@ -242,7 +242,7 @@ static int ip_frag_reinit(struct ipq *qp)
>         }
>
>         sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
> -       sub_frag_mem_limit(qp->q.fqdir, sum_truesize);
> +       sub_frag_mem_limit(&qp->q, sum_truesize);
>
>         qp->q.flags = 0;
>         qp->q.len = 0;
> @@ -339,7 +339,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
>         qp->q.mono_delivery_time = skb->mono_delivery_time;
>         qp->q.meat += skb->len;
>         qp->ecn |= ecn;
> -       add_frag_mem_limit(qp->q.fqdir, skb->truesize);
> +       add_frag_mem_limit(&qp->q, skb->truesize);
>         if (offset == 0)
>                 qp->q.flags |= INET_FRAG_FIRST_IN;
>
> diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
> index 7dd3629dd19e..11ce2335c584 100644
> --- a/net/ipv6/netfilter/nf_conntrack_reasm.c
> +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
> @@ -269,7 +269,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
>         fq->ecn |= ecn;
>         if (payload_len > fq->q.max_size)
>                 fq->q.max_size = payload_len;
> -       add_frag_mem_limit(fq->q.fqdir, skb->truesize);
> +       add_frag_mem_limit(&fq->q, skb->truesize);
>
>         /* The first fragment.
>          * nhoffset is obtained from the first fragment, of course.
> diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
> index ff866f2a879e..cd4ba6cc956b 100644
> --- a/net/ipv6/reassembly.c
> +++ b/net/ipv6/reassembly.c
> @@ -197,7 +197,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
>         fq->q.mono_delivery_time = skb->mono_delivery_time;
>         fq->q.meat += skb->len;
>         fq->ecn |= ecn;
> -       add_frag_mem_limit(fq->q.fqdir, skb->truesize);
> +       add_frag_mem_limit(&fq->q, skb->truesize);
>
>         fragsize = -skb_network_offset(skb) + skb->len;
>         if (fragsize > fq->q.max_size)
> --
> 2.36.1
>
Richard Gobert Sept. 1, 2022, 3:01 p.m. UTC | #2
On Mon, Aug 29, 2022 at 03:15:47PM -0700, Eric Dumazet wrote:
> We tried to get rid of any dependence over inetpeer, which is not
> resistant against DDOS attacks.
> 
> So I would not add a new dependency.

I see your point. What do you suggest doing differently?

The inetpeer mechanism is used for IPv4 frags. If it isn't resistant
against DDoS attacks, can it perhaps be improved?
Eric Dumazet Sept. 1, 2022, 4:06 p.m. UTC | #3
On Thu, Sep 1, 2022 at 8:03 AM Richard Gobert <richardbgobert@gmail.com> wrote:
>
> On Mon, Aug 29, 2022 at 03:15:47PM -0700, Eric Dumazet wrote:
> > We tried to get rid of any dependence over inetpeer, which is not
> > resistant against DDOS attacks.
> >
> > So I would not add a new dependency.
>
> I see your point. What do you suggest doing differently?
>
> The inetpeer mechanism is used for IPv4 frags. If it isn't resistant
> against DDoS attacks, can it perhaps be improved?

It can be disabled if needed, by changing ipfrag_max_dist sysctl.

Quite frankly IPv4 reassembly unit is a toy, I am always surprised
some applications are still relying on IP fragments.
Richard Gobert Sept. 11, 2022, 11:54 a.m. UTC | #4
On Thu, Sep 01, 2022 at 09:06:59AM -0700, Eric Dumazet wrote:
> It can be disabled if needed, by changing ipfrag_max_dist sysctl.

I understand your reluctance to add another dependency on inetpeer.

> Quite frankly IPv4 reassembly unit is a toy, I am always surprised
> some applications are still relying on IP fragments.

Do you think there's any room for improvement in IP fragments? I
believe that it is possible to make frags less fragile and prone
to overload in real-world scenarios.
diff mbox series

Patch

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 05d95fad8a1a..077a0ec78a58 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -155,15 +155,8 @@  static inline long frag_mem_limit(const struct fqdir *fqdir)
 	return atomic_long_read(&fqdir->mem);
 }
 
-static inline void sub_frag_mem_limit(struct fqdir *fqdir, long val)
-{
-	atomic_long_sub(val, &fqdir->mem);
-}
-
-static inline void add_frag_mem_limit(struct fqdir *fqdir, long val)
-{
-	atomic_long_add(val, &fqdir->mem);
-}
+void sub_frag_mem_limit(struct inet_frag_queue *q, long val);
+void add_frag_mem_limit(struct inet_frag_queue *q, long val);
 
 /* RFC 3168 support :
  * We want to check ECN values of all fragments, do detect invalid combinations.
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 74ff688568a0..1c602a706742 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -41,6 +41,7 @@  struct inet_peer {
 	u32			rate_tokens;	/* rate limiting for ICMP */
 	u32			n_redirects;
 	unsigned long		rate_last;
+	atomic_long_t		frag_mem;
 	/*
 	 * Once inet_peer is queued for deletion (refcnt == 0), following field
 	 * is not available: rid
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index a91283d1e5bf..0bf207e94082 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -135,7 +135,7 @@  static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
 		fq->q.flags |= INET_FRAG_FIRST_IN;
 
 	fq->q.meat += skb->len;
-	add_frag_mem_limit(fq->q.fqdir, skb->truesize);
+	add_frag_mem_limit(&fq->q, skb->truesize);
 
 	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
 	    fq->q.meat == fq->q.len) {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index c3ec1dbe7081..8b8d77d548d4 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -250,6 +250,29 @@  void inet_frag_kill(struct inet_frag_queue *fq)
 }
 EXPORT_SYMBOL(inet_frag_kill);
 
+static inline long peer_mem_limit(const struct inet_frag_queue *q)
+{
+	if (!q->peer)
+		return 0;
+	return atomic_long_read(&q->peer->frag_mem);
+}
+
+void sub_frag_mem_limit(struct inet_frag_queue *q, long val)
+{
+	if (q->peer)
+		atomic_long_sub(val, &q->peer->frag_mem);
+	atomic_long_sub(val, &q->fqdir->mem);
+}
+EXPORT_SYMBOL(sub_frag_mem_limit);
+
+void add_frag_mem_limit(struct inet_frag_queue *q, long val)
+{
+	if (q->peer)
+		atomic_long_add(val, &q->peer->frag_mem);
+	atomic_long_add(val, &q->fqdir->mem);
+}
+EXPORT_SYMBOL(add_frag_mem_limit);
+
 static void inet_frag_destroy_rcu(struct rcu_head *head)
 {
 	struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
@@ -306,9 +329,8 @@  void inet_frag_destroy(struct inet_frag_queue *q)
 	sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
 	sum = sum_truesize + f->qsize;
 
+	sub_frag_mem_limit(q, sum);
 	inet_frag_free(q);
-
-	sub_frag_mem_limit(fqdir, sum);
 }
 EXPORT_SYMBOL(inet_frag_destroy);
 
@@ -324,7 +346,7 @@  static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
 
 	q->fqdir = fqdir;
 	f->constructor(q, arg);
-	add_frag_mem_limit(fqdir, f->qsize);
+	add_frag_mem_limit(q, f->qsize);
 
 	timer_setup(&q->timer, f->frag_expire, 0);
 	spin_lock_init(&q->lock);
@@ -483,7 +505,7 @@  void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
 
 	delta += head->truesize;
 	if (delta)
-		add_frag_mem_limit(q->fqdir, delta);
+		add_frag_mem_limit(q, delta);
 
 	/* If the first fragment is fragmented itself, we split
 	 * it to two chunks: the first with data and paged part
@@ -505,7 +527,7 @@  void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
 		head->truesize += clone->truesize;
 		clone->csum = 0;
 		clone->ip_summed = head->ip_summed;
-		add_frag_mem_limit(q->fqdir, clone->truesize);
+		add_frag_mem_limit(q, clone->truesize);
 		skb_shinfo(head)->frag_list = clone;
 		nextp = &clone->next;
 	} else {
@@ -575,7 +597,7 @@  void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
 			rbn = rbnext;
 		}
 	}
-	sub_frag_mem_limit(q->fqdir, sum_truesize);
+	sub_frag_mem_limit(q, sum_truesize);
 
 	*nextp = NULL;
 	skb_mark_not_on_list(head);
@@ -604,7 +626,7 @@  struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
 	if (head == q->fragments_tail)
 		q->fragments_tail = NULL;
 
-	sub_frag_mem_limit(q->fqdir, head->truesize);
+	sub_frag_mem_limit(q, head->truesize);
 
 	return head;
 }
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index e9fed83e9b3c..6e7325dba417 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -216,6 +216,7 @@  struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 			p->dtime = (__u32)jiffies;
 			refcount_set(&p->refcnt, 2);
 			atomic_set(&p->rid, 0);
+			atomic_long_set(&p->frag_mem, 0);
 			p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
 			p->rate_tokens = 0;
 			p->n_redirects = 0;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index d0c22c41cf26..e35061f6aadb 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -242,7 +242,7 @@  static int ip_frag_reinit(struct ipq *qp)
 	}
 
 	sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
-	sub_frag_mem_limit(qp->q.fqdir, sum_truesize);
+	sub_frag_mem_limit(&qp->q, sum_truesize);
 
 	qp->q.flags = 0;
 	qp->q.len = 0;
@@ -339,7 +339,7 @@  static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	qp->q.mono_delivery_time = skb->mono_delivery_time;
 	qp->q.meat += skb->len;
 	qp->ecn |= ecn;
-	add_frag_mem_limit(qp->q.fqdir, skb->truesize);
+	add_frag_mem_limit(&qp->q, skb->truesize);
 	if (offset == 0)
 		qp->q.flags |= INET_FRAG_FIRST_IN;
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 7dd3629dd19e..11ce2335c584 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -269,7 +269,7 @@  static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 	fq->ecn |= ecn;
 	if (payload_len > fq->q.max_size)
 		fq->q.max_size = payload_len;
-	add_frag_mem_limit(fq->q.fqdir, skb->truesize);
+	add_frag_mem_limit(&fq->q, skb->truesize);
 
 	/* The first fragment.
 	 * nhoffset is obtained from the first fragment, of course.
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index ff866f2a879e..cd4ba6cc956b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -197,7 +197,7 @@  static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	fq->q.mono_delivery_time = skb->mono_delivery_time;
 	fq->q.meat += skb->len;
 	fq->ecn |= ecn;
-	add_frag_mem_limit(fq->q.fqdir, skb->truesize);
+	add_frag_mem_limit(&fq->q, skb->truesize);
 
 	fragsize = -skb_network_offset(skb) + skb->len;
 	if (fragsize > fq->q.max_size)