diff mbox series

[BlueZ,5/6] mesh: Keep canceled SAR data for at least 10 sec

Message ID 20221006145927.32731-6-isak.westin@loytec.com (mailing list archive)
State New, archived
Headers show
Series Mesh: Fixes for PTS issues | expand

Checks

Context Check Description
tedd_an/pre-ci_am success Success
tedd_an/checkpatch success Checkpatch PASS
tedd_an/gitlint success Gitlint PASS

Commit Message

Isak Westin Oct. 6, 2022, 2:59 p.m. UTC
When a SAR transmission has been completed or canceled, the recipent
should store the block authentication values for at least 10 seconds
and ignore new segments with the same values during this period. See
MshPRFv1.0.1 section 3.5.3.4.
---
 mesh/net.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

Comments

Brian Gix Oct. 6, 2022, 8:55 p.m. UTC | #1
Hi Isak,

Have you run this patch through any runtime analysis (like valgrind
etc) to ensure it has introduced no memory leaks?

I am particularily concerned with any l_timeout_remove() calls that
don't immediately set the freed timer pointer to NULL, and any new
l_timeout_create() calls that are not preceded with a check that
there is not already a valid pointer occupying the seg_timeout or
msg_timeout members. 

On Thu, 2022-10-06 at 16:59 +0200, Isak Westin wrote:
> When a SAR transmission has been completed or canceled, the recipent
> should store the block authentication values for at least 10 seconds
> and ignore new segments with the same values during this period. See
> MshPRFv1.0.1 section 3.5.3.4.
> ---
>  mesh/net.c | 30 ++++++++++++++++++++++++++++--
>  1 file changed, 28 insertions(+), 2 deletions(-)
> 
> diff --git a/mesh/net.c b/mesh/net.c
> index 379a6e250..e95ae5114 100644
> --- a/mesh/net.c
> +++ b/mesh/net.c
> @@ -46,6 +46,7 @@
>  
>  #define SEG_TO 2
>  #define MSG_TO 60
> +#define SAR_DEL        10
>  
>  #define DEFAULT_TRANSMIT_COUNT         1
>  #define DEFAULT_TRANSMIT_INTERVAL      100
> @@ -166,6 +167,7 @@ struct mesh_sar {
>         bool segmented;
>         bool frnd;
>         bool frnd_cred;
> +       bool delete;
>         uint8_t ttl;
>         uint8_t last_seg;
>         uint8_t key_aid;
> @@ -1493,13 +1495,27 @@ static void inseg_to(struct l_timeout
> *seg_timeout, void *user_data)
>  static void inmsg_to(struct l_timeout *msg_timeout, void *user_data)
>  {
>         struct mesh_net *net = user_data;
> -       struct mesh_sar *sar = l_queue_remove_if(net->sar_in,
> +       struct mesh_sar *sar = l_queue_find(net->sar_in,
>                         match_msg_timeout, msg_timeout);
>  
>         l_timeout_remove(msg_timeout);
>         if (!sar)
>                 return;
>  
> +       if (!sar->delete) {
> +               /*
> +                * Incomplete timer expired, cancel SAR and start
> +                * delete timer
> +                */
> +               sar->delete = true;
> +               l_timeout_remove(sar->seg_timeout);
> +               sar->seg_timeout = NULL;
> +               sar->msg_timeout = l_timeout_create(SAR_DEL,
> +                                       inmsg_to, net, NULL);
> +               return;
> +       }
> +
> +       l_queue_remove(net->sar_in, sar);
>         sar->msg_timeout = NULL;
>         mesh_sar_free(sar);
>  }
> @@ -1956,7 +1972,9 @@ static bool seg_rxed(struct mesh_net *net, bool
> frnd, uint32_t iv_index,
>                         /* Re-Send ACK for full msg */
>                         send_net_ack(net, sar_in, expected);
>                         return true;
> -               }
> +               } else if (sar_in->delete)
> +                       /* Ignore canceled */
> +                       return false;
>         } else {
>                 uint16_t len = MAX_SEG_TO_LEN(segN);
>  
> @@ -1996,6 +2014,9 @@ static bool seg_rxed(struct mesh_net *net, bool
> frnd, uint32_t iv_index,
>                 sar_in->len = segN * MAX_SEG_LEN + size;
>  
>         if (sar_in->flags == expected) {
> +               /* Remove message incomplete timer */
> +               l_timeout_remove(sar_in->msg_timeout);
> +
>                 /* Got it all */
>                 send_net_ack(net, sar_in, expected);
>  
> @@ -2006,6 +2027,11 @@ static bool seg_rxed(struct mesh_net *net,
> bool frnd, uint32_t iv_index,
>                 /* Kill Inter-Seg timeout */
>                 l_timeout_remove(sar_in->seg_timeout);
>                 sar_in->seg_timeout = NULL;
> +
> +               /* Start delete timer */
> +               sar_in->delete = true;
> +               sar_in->msg_timeout = l_timeout_create(SAR_DEL,
> +                               inmsg_to, net, NULL);
>                 return true;
>         }
>
Isak Westin Oct. 7, 2022, 1:33 p.m. UTC | #2
Hi Brian,

> Hi Isak,
>
> Have you run this patch through any runtime analysis (like valgrind
> etc) to ensure it has introduced no memory leaks?
>
> I am particularily concerned with any l_timeout_remove() calls that don't immediately set the freed timer pointer to NULL, and any new
> l_timeout_create() calls that are not preceded with a check that there is not already a valid pointer occupying the seg_timeout or msg_timeout members.
>

I tested it with valgrind and found no memory leaks. However, it is perhaps anyway a better practice to use l_timeout_modify() ?
If so, I will update the patch.

> On Thu, 2022-10-06 at 16:59 +0200, Isak Westin wrote:
> > When a SAR transmission has been completed or canceled, the recipent
> > should store the block authentication values for at least 10 seconds
> > and ignore new segments with the same values during this period. See
> > MshPRFv1.0.1 section 3.5.3.4.
> > ---
> >  mesh/net.c | 30 ++++++++++++++++++++++++++++--
> >  1 file changed, 28 insertions(+), 2 deletions(-)
> >
> > diff --git a/mesh/net.c b/mesh/net.c
> > index 379a6e250..e95ae5114 100644
> > --- a/mesh/net.c
> > +++ b/mesh/net.c
> > @@ -46,6 +46,7 @@
> >
> >  #define SEG_TO 2
> >  #define MSG_TO 60
> > +#define SAR_DEL        10
> >
> >  #define DEFAULT_TRANSMIT_COUNT         1
> >  #define DEFAULT_TRANSMIT_INTERVAL      100
> > @@ -166,6 +167,7 @@ struct mesh_sar {
> >         bool segmented;
> >         bool frnd;
> >         bool frnd_cred;
> > +       bool delete;
> >         uint8_t ttl;
> >         uint8_t last_seg;
> >         uint8_t key_aid;
> > @@ -1493,13 +1495,27 @@ static void inseg_to(struct l_timeout
> > *seg_timeout, void *user_data)  static void inmsg_to(struct l_timeout
> > *msg_timeout, void *user_data)  {
> >         struct mesh_net *net = user_data;
> > -       struct mesh_sar *sar = l_queue_remove_if(net->sar_in,
> > +       struct mesh_sar *sar = l_queue_find(net->sar_in,
> >                         match_msg_timeout, msg_timeout);
> >
> >         l_timeout_remove(msg_timeout);
> >         if (!sar)
> >                 return;
> >
> > +       if (!sar->delete) {
> > +               /*
> > +                * Incomplete timer expired, cancel SAR and start
> > +                * delete timer
> > +                */
> > +               sar->delete = true;
> > +               l_timeout_remove(sar->seg_timeout);
> > +               sar->seg_timeout = NULL;
> > +               sar->msg_timeout = l_timeout_create(SAR_DEL,
> > +                                       inmsg_to, net, NULL);
> > +               return;
> > +       }
> > +
> > +       l_queue_remove(net->sar_in, sar);
> >         sar->msg_timeout = NULL;
> >         mesh_sar_free(sar);
> >  }
> > @@ -1956,7 +1972,9 @@ static bool seg_rxed(struct mesh_net *net, bool
> > frnd, uint32_t iv_index,
> >                         /* Re-Send ACK for full msg */
> >                         send_net_ack(net, sar_in, expected);
> >                         return true;
> > -               }
> > +               } else if (sar_in->delete)
> > +                       /* Ignore canceled */
> > +                       return false;
> >         } else {
> >                 uint16_t len = MAX_SEG_TO_LEN(segN);
> >
> > @@ -1996,6 +2014,9 @@ static bool seg_rxed(struct mesh_net *net, bool
> > frnd, uint32_t iv_index,
> >                 sar_in->len = segN * MAX_SEG_LEN + size;
> >
> >         if (sar_in->flags == expected) {
> > +               /* Remove message incomplete timer */
> > +               l_timeout_remove(sar_in->msg_timeout);
> > +
> >                 /* Got it all */
> >                 send_net_ack(net, sar_in, expected);
> >
> > @@ -2006,6 +2027,11 @@ static bool seg_rxed(struct mesh_net *net, bool
> > frnd, uint32_t iv_index,
> >                 /* Kill Inter-Seg timeout */
> >                 l_timeout_remove(sar_in->seg_timeout);
> >                 sar_in->seg_timeout = NULL;
> > +
> > +               /* Start delete timer */
> > +               sar_in->delete = true;
> > +               sar_in->msg_timeout = l_timeout_create(SAR_DEL,
> > +                               inmsg_to, net, NULL);
> >                 return true;
> >         }
> >
>
>
Brian Gix Oct. 7, 2022, 2:56 p.m. UTC | #3
Hi Isak,

On Fri, 2022-10-07 at 13:33 +0000, Isak Westin wrote:
> Hi Brian,
> 
> > Hi Isak,
> > 
> > Have you run this patch through any runtime analysis (like valgrind
> > etc) to ensure it has introduced no memory leaks?
> > 
> > I am particularily concerned with any l_timeout_remove() calls that
> > don't immediately set the freed timer pointer to NULL, and any new
> > l_timeout_create() calls that are not preceded with a check that
> > there is not already a valid pointer occupying the seg_timeout or
> > msg_timeout members.
> > 
> 
> I tested it with valgrind and found no memory leaks. However, it is
> perhaps anyway a better practice to use l_timeout_modify() ?
> If so, I will update the patch.

What's important to remember about the l_timeout* functions is that
they do not clean-up after themselves, you must remove them when you
are done, and you need to be careful to not double-free.

You can free and then create if you are more comfortable with that then
l_timeout_modify, but follow the rule of thumb to set the pointers to
NULL after you have freed the timer, and make sure you free the timers
before creating a new one. And free the timers that have fired which
you do not intend to restart. Many of the timers in the SAR system 
never trigger if the messages are flowing as they should, and so some
potential memory leaks don't get caught by valgrind *unless* an
"abnormal" timer fire occurs.  We've had to address a lot of those.

> 
> > On Thu, 2022-10-06 at 16:59 +0200, Isak Westin wrote:
> > > When a SAR transmission has been completed or canceled, the
> > > recipent
> > > should store the block authentication values for at least 10
> > > seconds
> > > and ignore new segments with the same values during this period.
> > > See
> > > MshPRFv1.0.1 section 3.5.3.4.
> > > ---
> > >  mesh/net.c | 30 ++++++++++++++++++++++++++++--
> > >  1 file changed, 28 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/mesh/net.c b/mesh/net.c
> > > index 379a6e250..e95ae5114 100644
> > > --- a/mesh/net.c
> > > +++ b/mesh/net.c
> > > @@ -46,6 +46,7 @@
> > > 
> > >  #define SEG_TO 2
> > >  #define MSG_TO 60
> > > +#define SAR_DEL        10
> > > 
> > >  #define DEFAULT_TRANSMIT_COUNT         1
> > >  #define DEFAULT_TRANSMIT_INTERVAL      100
> > > @@ -166,6 +167,7 @@ struct mesh_sar {
> > >         bool segmented;
> > >         bool frnd;
> > >         bool frnd_cred;
> > > +       bool delete;
> > >         uint8_t ttl;
> > >         uint8_t last_seg;
> > >         uint8_t key_aid;
> > > @@ -1493,13 +1495,27 @@ static void inseg_to(struct l_timeout
> > > *seg_timeout, void *user_data)  static void inmsg_to(struct
> > > l_timeout
> > > *msg_timeout, void *user_data)  {
> > >         struct mesh_net *net = user_data;
> > > -       struct mesh_sar *sar = l_queue_remove_if(net->sar_in,
> > > +       struct mesh_sar *sar = l_queue_find(net->sar_in,
> > >                         match_msg_timeout, msg_timeout);
> > > 
> > >         l_timeout_remove(msg_timeout);
> > >         if (!sar)
> > >                 return;
> > > 
> > > +       if (!sar->delete) {
> > > +               /*
> > > +                * Incomplete timer expired, cancel SAR and start
> > > +                * delete timer
> > > +                */
> > > +               sar->delete = true;
> > > +               l_timeout_remove(sar->seg_timeout);
> > > +               sar->seg_timeout = NULL;
> > > +               sar->msg_timeout = l_timeout_create(SAR_DEL,
> > > +                                       inmsg_to, net, NULL);
> > > +               return;
> > > +       }
> > > +
> > > +       l_queue_remove(net->sar_in, sar);
> > >         sar->msg_timeout = NULL;
> > >         mesh_sar_free(sar);
> > >  }
> > > @@ -1956,7 +1972,9 @@ static bool seg_rxed(struct mesh_net *net,
> > > bool
> > > frnd, uint32_t iv_index,
> > >                         /* Re-Send ACK for full msg */
> > >                         send_net_ack(net, sar_in, expected);
> > >                         return true;
> > > -               }
> > > +               } else if (sar_in->delete)
> > > +                       /* Ignore canceled */
> > > +                       return false;
> > >         } else {
> > >                 uint16_t len = MAX_SEG_TO_LEN(segN);
> > > 
> > > @@ -1996,6 +2014,9 @@ static bool seg_rxed(struct mesh_net *net,
> > > bool
> > > frnd, uint32_t iv_index,
> > >                 sar_in->len = segN * MAX_SEG_LEN + size;
> > > 
> > >         if (sar_in->flags == expected) {
> > > +               /* Remove message incomplete timer */
> > > +               l_timeout_remove(sar_in->msg_timeout);
> > > +
> > >                 /* Got it all */
> > >                 send_net_ack(net, sar_in, expected);
> > > 
> > > @@ -2006,6 +2027,11 @@ static bool seg_rxed(struct mesh_net *net,
> > > bool
> > > frnd, uint32_t iv_index,
> > >                 /* Kill Inter-Seg timeout */
> > >                 l_timeout_remove(sar_in->seg_timeout);
> > >                 sar_in->seg_timeout = NULL;
> > > +
> > > +               /* Start delete timer */
> > > +               sar_in->delete = true;
> > > +               sar_in->msg_timeout = l_timeout_create(SAR_DEL,
> > > +                               inmsg_to, net, NULL);
> > >                 return true;
> > >         }
> > > 
> > 
> >
Isak Westin Oct. 11, 2022, 2:13 p.m. UTC | #4
Hi Brian,

> Hi Isak,
>
> On Fri, 2022-10-07 at 13:33 +0000, Isak Westin wrote:
> > Hi Brian,
> >
> > > Hi Isak,
> > >
> > > Have you run this patch through any runtime analysis (like valgrind
> > > etc) to ensure it has introduced no memory leaks?
> > >
> > > I am particularily concerned with any l_timeout_remove() calls that
> > > don't immediately set the freed timer pointer to NULL, and any new
> > > l_timeout_create() calls that are not preceded with a check that
> > > there is not already a valid pointer occupying the seg_timeout or
> > > msg_timeout members.
> > >
> >
> > I tested it with valgrind and found no memory leaks. However, it is
> > perhaps anyway a better practice to use l_timeout_modify() ?
> > If so, I will update the patch.
>
> What's important to remember about the l_timeout* functions is
> that they do not clean-up after themselves, you must remove them
> when you are done, and you need to be careful to not double-free.
>
> You can free and then create if you are more comfortable with that
> then l_timeout_modify, but follow the rule of thumb to set the
> pointers to NULL after you have freed the timer, and make sure
> you free the timers before creating a new one. And free the timers
> that have fired which you do not intend to restart. Many of the
> timers in the SAR system never trigger if the messages are flowing
> as they should, and so some potential memory leaks don't get
> caught by valgrind *unless* an "abnormal" timer fire occurs.
> We've had to address a lot of those.
>

I submitted a v3 (fixed some spelling from v2) patch now where I changed to use l_timeout_modify instead. That makes more sense imo, since the timeout is in fact being reused.
I tested this patch with valgrind in PTS but also in "normal" operation with a running application. If there is something additional you would wish me to test, please let me know.

> >
> > > On Thu, 2022-10-06 at 16:59 +0200, Isak Westin wrote:
> > > > When a SAR transmission has been completed or canceled, the
> > > > recipent should store the block authentication values for at least
> > > > 10 seconds and ignore new segments with the same values during
> > > > this period.
> > > > See
> > > > MshPRFv1.0.1 section 3.5.3.4.
> > > > ---
> > > >  mesh/net.c | 30 ++++++++++++++++++++++++++++--
> > > >  1 file changed, 28 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/mesh/net.c b/mesh/net.c index 379a6e250..e95ae5114
> > > > 100644
> > > > --- a/mesh/net.c
> > > > +++ b/mesh/net.c
> > > > @@ -46,6 +46,7 @@
> > > >
> > > >  #define SEG_TO 2
> > > >  #define MSG_TO 60
> > > > +#define SAR_DEL        10
> > > >
> > > >  #define DEFAULT_TRANSMIT_COUNT         1
> > > >  #define DEFAULT_TRANSMIT_INTERVAL      100
> > > > @@ -166,6 +167,7 @@ struct mesh_sar {
> > > >         bool segmented;
> > > >         bool frnd;
> > > >         bool frnd_cred;
> > > > +       bool delete;
> > > >         uint8_t ttl;
> > > >         uint8_t last_seg;
> > > >         uint8_t key_aid;
> > > > @@ -1493,13 +1495,27 @@ static void inseg_to(struct l_timeout
> > > > *seg_timeout, void *user_data)  static void inmsg_to(struct
> > > > l_timeout *msg_timeout, void *user_data)  {
> > > >         struct mesh_net *net = user_data;
> > > > -       struct mesh_sar *sar = l_queue_remove_if(net->sar_in,
> > > > +       struct mesh_sar *sar = l_queue_find(net->sar_in,
> > > >                         match_msg_timeout, msg_timeout);
> > > >
> > > >         l_timeout_remove(msg_timeout);
> > > >         if (!sar)
> > > >                 return;
> > > >
> > > > +       if (!sar->delete) {
> > > > +               /*
> > > > +                * Incomplete timer expired, cancel SAR and start
> > > > +                * delete timer
> > > > +                */
> > > > +               sar->delete = true;
> > > > +               l_timeout_remove(sar->seg_timeout);
> > > > +               sar->seg_timeout = NULL;
> > > > +               sar->msg_timeout = l_timeout_create(SAR_DEL,
> > > > +                                       inmsg_to, net, NULL);
> > > > +               return;
> > > > +       }
> > > > +
> > > > +       l_queue_remove(net->sar_in, sar);
> > > >         sar->msg_timeout = NULL;
> > > >         mesh_sar_free(sar);
> > > >  }
> > > > @@ -1956,7 +1972,9 @@ static bool seg_rxed(struct mesh_net *net,
> > > > bool frnd, uint32_t iv_index,
> > > >                         /* Re-Send ACK for full msg */
> > > >                         send_net_ack(net, sar_in, expected);
> > > >                         return true;
> > > > -               }
> > > > +               } else if (sar_in->delete)
> > > > +                       /* Ignore canceled */
> > > > +                       return false;
> > > >         } else {
> > > >                 uint16_t len = MAX_SEG_TO_LEN(segN);
> > > >
> > > > @@ -1996,6 +2014,9 @@ static bool seg_rxed(struct mesh_net *net,
> > > > bool frnd, uint32_t iv_index,
> > > >                 sar_in->len = segN * MAX_SEG_LEN + size;
> > > >
> > > >         if (sar_in->flags == expected) {
> > > > +               /* Remove message incomplete timer */
> > > > +               l_timeout_remove(sar_in->msg_timeout);
> > > > +
> > > >                 /* Got it all */
> > > >                 send_net_ack(net, sar_in, expected);
> > > >
> > > > @@ -2006,6 +2027,11 @@ static bool seg_rxed(struct mesh_net *net,
> > > > bool frnd, uint32_t iv_index,
> > > >                 /* Kill Inter-Seg timeout */
> > > >                 l_timeout_remove(sar_in->seg_timeout);
> > > >                 sar_in->seg_timeout = NULL;
> > > > +
> > > > +               /* Start delete timer */
> > > > +               sar_in->delete = true;
> > > > +               sar_in->msg_timeout = l_timeout_create(SAR_DEL,
> > > > +                               inmsg_to, net, NULL);
> > > >                 return true;
> > > >         }
> > > >
> > >
> > >
>
>
diff mbox series

Patch

diff --git a/mesh/net.c b/mesh/net.c
index 379a6e250..e95ae5114 100644
--- a/mesh/net.c
+++ b/mesh/net.c
@@ -46,6 +46,7 @@ 
 
 #define SEG_TO	2
 #define MSG_TO	60
+#define SAR_DEL	10
 
 #define DEFAULT_TRANSMIT_COUNT		1
 #define DEFAULT_TRANSMIT_INTERVAL	100
@@ -166,6 +167,7 @@  struct mesh_sar {
 	bool segmented;
 	bool frnd;
 	bool frnd_cred;
+	bool delete;
 	uint8_t ttl;
 	uint8_t last_seg;
 	uint8_t key_aid;
@@ -1493,13 +1495,27 @@  static void inseg_to(struct l_timeout *seg_timeout, void *user_data)
 static void inmsg_to(struct l_timeout *msg_timeout, void *user_data)
 {
 	struct mesh_net *net = user_data;
-	struct mesh_sar *sar = l_queue_remove_if(net->sar_in,
+	struct mesh_sar *sar = l_queue_find(net->sar_in,
 			match_msg_timeout, msg_timeout);
 
 	l_timeout_remove(msg_timeout);
 	if (!sar)
 		return;
 
+	if (!sar->delete) {
+		/*
+		 * Incomplete timer expired, cancel SAR and start
+		 * delete timer
+		 */
+		sar->delete = true;
+		l_timeout_remove(sar->seg_timeout);
+		sar->seg_timeout = NULL;
+		sar->msg_timeout = l_timeout_create(SAR_DEL,
+					inmsg_to, net, NULL);
+		return;
+	}
+
+	l_queue_remove(net->sar_in, sar);
 	sar->msg_timeout = NULL;
 	mesh_sar_free(sar);
 }
@@ -1956,7 +1972,9 @@  static bool seg_rxed(struct mesh_net *net, bool frnd, uint32_t iv_index,
 			/* Re-Send ACK for full msg */
 			send_net_ack(net, sar_in, expected);
 			return true;
-		}
+		} else if (sar_in->delete)
+			/* Ignore canceled */
+			return false;
 	} else {
 		uint16_t len = MAX_SEG_TO_LEN(segN);
 
@@ -1996,6 +2014,9 @@  static bool seg_rxed(struct mesh_net *net, bool frnd, uint32_t iv_index,
 		sar_in->len = segN * MAX_SEG_LEN + size;
 
 	if (sar_in->flags == expected) {
+		/* Remove message incomplete timer */
+		l_timeout_remove(sar_in->msg_timeout);
+
 		/* Got it all */
 		send_net_ack(net, sar_in, expected);
 
@@ -2006,6 +2027,11 @@  static bool seg_rxed(struct mesh_net *net, bool frnd, uint32_t iv_index,
 		/* Kill Inter-Seg timeout */
 		l_timeout_remove(sar_in->seg_timeout);
 		sar_in->seg_timeout = NULL;
+
+		/* Start delete timer */
+		sar_in->delete = true;
+		sar_in->msg_timeout = l_timeout_create(SAR_DEL,
+				inmsg_to, net, NULL);
 		return true;
 	}