diff mbox series

[v2,1/2] maple_tree: reset mas->index and mas->last on write retries

Message ID 20240812190543.71967-1-sidhartha.kumar@oracle.com (mailing list archive)
State New
Headers show
Series [v2,1/2] maple_tree: reset mas->index and mas->last on write retries | expand

Commit Message

Sid Kumar Aug. 12, 2024, 7:05 p.m. UTC
The following scenario can result in a race condition:

Consider a node with the following indices and values

	a<------->b<----------->c<--------->d
	    0xA        NULL          0xB

	CPU 1			  CPU 2
      ---------        		---------
	mas_set_range(a,b)
	mas_erase()
		-> range is expanded (a,c) because of null expansion

	mas_nomem()
	mas_unlock()
				mas_store_range(b,c,0xC)

The node now looks like:

	a<------->b<----------->c<--------->d
	    0xA        0xC          0xB

	mas_lock()
	mas_erase() <------ range of erase is still (a,c)

The node is now NULL from (a,c) but the write from CPU 2 should have been
retained and range (b,c) should still have 0xC as its value. We can fix
this by re-intializing to the original index and last. This does not need
a cc: Stable as there are no users of the maple tree which use internal
locking and this condition is only possible with internal locking.

Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
---
v1 -> v2:
        - re-initialize index and last in the mas_nomem() if statement so
          fast path is not effected in mas_erase().

        - use __mas_set_range() rather than set mas->index and mas->last
          directly.

 lib/maple_tree.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

Comments

Liam R. Howlett Aug. 13, 2024, 2:12 p.m. UTC | #1
* Sidhartha Kumar <sidhartha.kumar@oracle.com> [240812 15:05]:
> The following scenario can result in a race condition:
> 
> Consider a node with the following indices and values
> 
> 	a<------->b<----------->c<--------->d
> 	    0xA        NULL          0xB
> 
> 	CPU 1			  CPU 2
>       ---------        		---------
> 	mas_set_range(a,b)
> 	mas_erase()
> 		-> range is expanded (a,c) because of null expansion
> 
> 	mas_nomem()
> 	mas_unlock()
> 				mas_store_range(b,c,0xC)
> 
> The node now looks like:
> 
> 	a<------->b<----------->c<--------->d
> 	    0xA        0xC          0xB
> 
> 	mas_lock()
> 	mas_erase() <------ range of erase is still (a,c)
> 
> The node is now NULL from (a,c) but the write from CPU 2 should have been
> retained and range (b,c) should still have 0xC as its value. We can fix
> this by re-intializing to the original index and last. This does not need
> a cc: Stable as there are no users of the maple tree which use internal
> locking and this condition is only possible with internal locking.
> 
> Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>

Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>

> ---
> v1 -> v2:
>         - re-initialize index and last in the mas_nomem() if statement so
>           fast path is not effected in mas_erase().
> 
>         - use __mas_set_range() rather than set mas->index and mas->last
>           directly.
> 
>  lib/maple_tree.c | 16 ++++++++++++----
>  1 file changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> index aa3a5df15b8e..b547ff211ac7 100644
> --- a/lib/maple_tree.c
> +++ b/lib/maple_tree.c
> @@ -5451,14 +5451,19 @@ EXPORT_SYMBOL_GPL(mas_store);
>   */
>  int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
>  {
> +	unsigned long index = mas->index;
> +	unsigned long last = mas->last;
>  	MA_WR_STATE(wr_mas, mas, entry);
>  
>  	mas_wr_store_setup(&wr_mas);
>  	trace_ma_write(__func__, mas, 0, entry);
>  retry:
>  	mas_wr_store_entry(&wr_mas);
> -	if (unlikely(mas_nomem(mas, gfp)))
> +	if (unlikely(mas_nomem(mas, gfp))) {
> +		if (!entry)
> +			__mas_set_range(mas, index, last);
>  		goto retry;
> +	}
>  
>  	if (unlikely(mas_is_err(mas)))
>  		return xa_err(mas->node);
> @@ -6245,23 +6250,26 @@ EXPORT_SYMBOL_GPL(mas_find_range_rev);
>  void *mas_erase(struct ma_state *mas)
>  {
>  	void *entry;
> +	unsigned long index = mas->index;
>  	MA_WR_STATE(wr_mas, mas, NULL);
>  
>  	if (!mas_is_active(mas) || !mas_is_start(mas))
>  		mas->status = ma_start;
>  
> -	/* Retry unnecessary when holding the write lock. */
> +write_retry:
>  	entry = mas_state_walk(mas);
>  	if (!entry)
>  		return NULL;
>  
> -write_retry:
>  	/* Must reset to ensure spanning writes of last slot are detected */
>  	mas_reset(mas);
>  	mas_wr_store_setup(&wr_mas);
>  	mas_wr_store_entry(&wr_mas);
> -	if (mas_nomem(mas, GFP_KERNEL))
> +	if (mas_nomem(mas, GFP_KERNEL)) {
> +		/* in case the range of entry changed when unlocked */
> +		mas->index = mas->last = index;
>  		goto write_retry;
> +	}
>  
>  	return entry;
>  }
> -- 
> 2.46.0
>
diff mbox series

Patch

diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index aa3a5df15b8e..b547ff211ac7 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5451,14 +5451,19 @@  EXPORT_SYMBOL_GPL(mas_store);
  */
 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
 {
+	unsigned long index = mas->index;
+	unsigned long last = mas->last;
 	MA_WR_STATE(wr_mas, mas, entry);
 
 	mas_wr_store_setup(&wr_mas);
 	trace_ma_write(__func__, mas, 0, entry);
 retry:
 	mas_wr_store_entry(&wr_mas);
-	if (unlikely(mas_nomem(mas, gfp)))
+	if (unlikely(mas_nomem(mas, gfp))) {
+		if (!entry)
+			__mas_set_range(mas, index, last);
 		goto retry;
+	}
 
 	if (unlikely(mas_is_err(mas)))
 		return xa_err(mas->node);
@@ -6245,23 +6250,26 @@  EXPORT_SYMBOL_GPL(mas_find_range_rev);
 void *mas_erase(struct ma_state *mas)
 {
 	void *entry;
+	unsigned long index = mas->index;
 	MA_WR_STATE(wr_mas, mas, NULL);
 
 	if (!mas_is_active(mas) || !mas_is_start(mas))
 		mas->status = ma_start;
 
-	/* Retry unnecessary when holding the write lock. */
+write_retry:
 	entry = mas_state_walk(mas);
 	if (!entry)
 		return NULL;
 
-write_retry:
 	/* Must reset to ensure spanning writes of last slot are detected */
 	mas_reset(mas);
 	mas_wr_store_setup(&wr_mas);
 	mas_wr_store_entry(&wr_mas);
-	if (mas_nomem(mas, GFP_KERNEL))
+	if (mas_nomem(mas, GFP_KERNEL)) {
+		/* in case the range of entry changed when unlocked */
+		mas->index = mas->last = index;
 		goto write_retry;
+	}
 
 	return entry;
 }