diff mbox series

[v2,2/4] brd: extend the rcu regions to cover read and write

Message ID alpine.LRH.2.02.2209201353540.26058@file01.intranet.prod.int.rdu2.redhat.com (mailing list archive)
State New, archived
Headers show
Series brd: implement discard | expand

Commit Message

Mikulas Patocka Sept. 20, 2022, 5:56 p.m. UTC
This patch extends the rcu regions, so that lookup followed by a read or
write of a page is done inside rcu read lock. This si be needed for the
following patch that enables discard.

Note that we also replace "BUG_ON(!page);" with "if (page) ..." in
copy_to_brd - the page may be NULL if write races with discard. In this
situation, the result is undefined, so we can actually skip the write
operation at all.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 drivers/block/brd.c |   59 +++++++++++++++++++++++-----------------------------
 1 file changed, 27 insertions(+), 32 deletions(-)

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel

Comments

Christoph Hellwig Sept. 23, 2022, 3:52 p.m. UTC | #1
On Tue, Sep 20, 2022 at 01:56:25PM -0400, Mikulas Patocka wrote:
> This patch extends the rcu regions, so that lookup followed by a read or
> write of a page is done inside rcu read lock. This si be needed for the
> following patch that enables discard.
> 
> Note that we also replace "BUG_ON(!page);" with "if (page) ..." in
> copy_to_brd - the page may be NULL if write races with discard. In this
> situation, the result is undefined, so we can actually skip the write
> operation at all.
> 
> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> 
> ---
>  drivers/block/brd.c |   59 +++++++++++++++++++++++-----------------------------
>  1 file changed, 27 insertions(+), 32 deletions(-)
> 
> Index: linux-2.6/drivers/block/brd.c
> ===================================================================
> --- linux-2.6.orig/drivers/block/brd.c
> +++ linux-2.6/drivers/block/brd.c
> @@ -50,31 +50,12 @@ struct brd_device {
>  
>  /*
>   * Look up and return a brd's page for a given sector.
> + * This must be called with the rcu lock held.
>   */
>  static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
>  {
> +	pgoff_t idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
> +	return radix_tree_lookup(&brd->brd_pages, idx);
>  }

This is still missing the rcu_read_lock_held() assertation if you
want to keep it as separate function.

> +	rcu_read_lock();
> +	page = brd_lookup_page(brd, sector);
> +	if (page) {
> +		dst = kmap_atomic(page);
> +		memcpy(dst + offset, src, copy);
> +		kunmap_atomic(dst);
> +	}
> +	rcu_read_unlock();

How is the null check going to work here?  Simply not copying
data is no exactly the expected result.

This is why I think we need the higher level rework I suggested
last time where we have a helper that always gives you page
(or maybe an error) by moving the insert so that it also does
the actual final lookup.

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
diff mbox series

Patch

Index: linux-2.6/drivers/block/brd.c
===================================================================
--- linux-2.6.orig/drivers/block/brd.c
+++ linux-2.6/drivers/block/brd.c
@@ -50,31 +50,12 @@  struct brd_device {
 
 /*
  * Look up and return a brd's page for a given sector.
+ * This must be called with the rcu lock held.
  */
 static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
 {
-	pgoff_t idx;
-	struct page *page;
-
-	/*
-	 * The page lifetime is protected by the fact that we have opened the
-	 * device node -- brd pages will never be deleted under us, so we
-	 * don't need any further locking or refcounting.
-	 *
-	 * This is strictly true for the radix-tree nodes as well (ie. we
-	 * don't actually need the rcu_read_lock()), however that is not a
-	 * documented feature of the radix-tree API so it is better to be
-	 * safe here (we don't have total exclusion from radix tree updates
-	 * here, only deletes).
-	 */
-	rcu_read_lock();
-	idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
-	page = radix_tree_lookup(&brd->brd_pages, idx);
-	rcu_read_unlock();
-
-	BUG_ON(page && page->index != idx);
-
-	return page;
+	pgoff_t idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
+	return radix_tree_lookup(&brd->brd_pages, idx);
 }
 
 /*
@@ -88,7 +69,9 @@  static bool brd_insert_page(struct brd_d
 	struct page *page;
 	gfp_t gfp_flags;
 
+	rcu_read_lock();
 	page = brd_lookup_page(brd, sector);
+	rcu_read_unlock();
 	if (page)
 		return true;
 
@@ -198,23 +181,29 @@  static void copy_to_brd(struct brd_devic
 	size_t copy;
 
 	copy = min_t(size_t, n, PAGE_SIZE - offset);
-	page = brd_lookup_page(brd, sector);
-	BUG_ON(!page);
 
-	dst = kmap_atomic(page);
-	memcpy(dst + offset, src, copy);
-	kunmap_atomic(dst);
+	rcu_read_lock();
+	page = brd_lookup_page(brd, sector);
+	if (page) {
+		dst = kmap_atomic(page);
+		memcpy(dst + offset, src, copy);
+		kunmap_atomic(dst);
+	}
+	rcu_read_unlock();
 
 	if (copy < n) {
 		src += copy;
 		sector += copy >> SECTOR_SHIFT;
 		copy = n - copy;
-		page = brd_lookup_page(brd, sector);
-		BUG_ON(!page);
 
-		dst = kmap_atomic(page);
-		memcpy(dst, src, copy);
-		kunmap_atomic(dst);
+		rcu_read_lock();
+		page = brd_lookup_page(brd, sector);
+		if (page) {
+			dst = kmap_atomic(page);
+			memcpy(dst, src, copy);
+			kunmap_atomic(dst);
+		}
+		rcu_read_unlock();
 	}
 }
 
@@ -230,6 +219,8 @@  static void copy_from_brd(void *dst, str
 	size_t copy;
 
 	copy = min_t(size_t, n, PAGE_SIZE - offset);
+
+	rcu_read_lock();
 	page = brd_lookup_page(brd, sector);
 	if (page) {
 		src = kmap_atomic(page);
@@ -237,11 +228,14 @@  static void copy_from_brd(void *dst, str
 		kunmap_atomic(src);
 	} else
 		memset(dst, 0, copy);
+	rcu_read_unlock();
 
 	if (copy < n) {
 		dst += copy;
 		sector += copy >> SECTOR_SHIFT;
 		copy = n - copy;
+
+		rcu_read_lock();
 		page = brd_lookup_page(brd, sector);
 		if (page) {
 			src = kmap_atomic(page);
@@ -249,6 +243,7 @@  static void copy_from_brd(void *dst, str
 			kunmap_atomic(src);
 		} else
 			memset(dst, 0, copy);
+		rcu_read_unlock();
 	}
 }