diff mbox series

[2/4] brd: extend the rcu regions to cover read and write

Message ID alpine.LRH.2.02.2209160459250.543@file01.intranet.prod.int.rdu2.redhat.com (mailing list archive)
State New, archived
Headers show
Series brd: implement discard | expand

Commit Message

Mikulas Patocka Sept. 16, 2022, 8:59 a.m. UTC
This patch extends the rcu regions, so that lookup followed by a read or
write of a page is done inside rcu read lock. This si be needed for the
following patch that enables discard.

Note that we also replace "BUG_ON(!page);" with "if (page) ..." in
copy_to_brd - the page may be NULL if write races with discard. In this
situation, the result is undefined, so we can actually skip the write
operation at all.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 drivers/block/brd.c |   50 +++++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

Comments

Christoph Hellwig Sept. 20, 2022, 7:38 a.m. UTC | #1
>   * Look up and return a brd's page for a given sector.
> + * This must be called with the rcu lock held.

Please ad a rcu_read_lock_held() check then.

> -	rcu_read_lock();
>  	idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
>  	page = radix_tree_lookup(&brd->brd_pages, idx);
> -	rcu_read_unlock();
> -
> -	BUG_ON(page && page->index != idx);
>  
>  	return page;

No need for the page variable now.  In fact there is no real need
for this helper now, as all the callers really should operate on
the sector on the index anyway.

>  }
> @@ -88,7 +74,9 @@ static bool brd_insert_page(struct brd_d
>  	struct page *page;
>  	gfp_t gfp_flags;
>  
> +	rcu_read_lock();
>  	page = brd_lookup_page(brd, sector);
> +	rcu_read_unlock();
>  	if (page)
>  		return true;

So this looks odd, as we drop the rcu lock without doing anything,
but it actually turns out to be correct as brd_do_bvec does yet
another lookup of it.  So we get an initial look, and optional
insert and then another lookup.  Not very efficient and it might be
worth to fix brd_do_bvec up to avoid these extra lookups given
that you touch it anyway (as would be an radix tree to xarray
conversion).
diff mbox series

Patch

Index: linux-2.6/drivers/block/brd.c
===================================================================
--- linux-2.6.orig/drivers/block/brd.c
+++ linux-2.6/drivers/block/brd.c
@@ -50,29 +50,15 @@  struct brd_device {
 
 /*
  * Look up and return a brd's page for a given sector.
+ * This must be called with the rcu lock held.
  */
 static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
 {
 	pgoff_t idx;
 	struct page *page;
 
-	/*
-	 * The page lifetime is protected by the fact that we have opened the
-	 * device node -- brd pages will never be deleted under us, so we
-	 * don't need any further locking or refcounting.
-	 *
-	 * This is strictly true for the radix-tree nodes as well (ie. we
-	 * don't actually need the rcu_read_lock()), however that is not a
-	 * documented feature of the radix-tree API so it is better to be
-	 * safe here (we don't have total exclusion from radix tree updates
-	 * here, only deletes).
-	 */
-	rcu_read_lock();
 	idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
 	page = radix_tree_lookup(&brd->brd_pages, idx);
-	rcu_read_unlock();
-
-	BUG_ON(page && page->index != idx);
 
 	return page;
 }
@@ -88,7 +74,9 @@  static bool brd_insert_page(struct brd_d
 	struct page *page;
 	gfp_t gfp_flags;
 
+	rcu_read_lock();
 	page = brd_lookup_page(brd, sector);
+	rcu_read_unlock();
 	if (page)
 		return true;
 
@@ -198,23 +186,29 @@  static void copy_to_brd(struct brd_devic
 	size_t copy;
 
 	copy = min_t(size_t, n, PAGE_SIZE - offset);
-	page = brd_lookup_page(brd, sector);
-	BUG_ON(!page);
 
-	dst = kmap_atomic(page);
-	memcpy(dst + offset, src, copy);
-	kunmap_atomic(dst);
+	rcu_read_lock();
+	page = brd_lookup_page(brd, sector);
+	if (page) {
+		dst = kmap_atomic(page);
+		memcpy(dst + offset, src, copy);
+		kunmap_atomic(dst);
+	}
+	rcu_read_unlock();
 
 	if (copy < n) {
 		src += copy;
 		sector += copy >> SECTOR_SHIFT;
 		copy = n - copy;
-		page = brd_lookup_page(brd, sector);
-		BUG_ON(!page);
 
-		dst = kmap_atomic(page);
-		memcpy(dst, src, copy);
-		kunmap_atomic(dst);
+		rcu_read_lock();
+		page = brd_lookup_page(brd, sector);
+		if (page) {
+			dst = kmap_atomic(page);
+			memcpy(dst, src, copy);
+			kunmap_atomic(dst);
+		}
+		rcu_read_unlock();
 	}
 }
 
@@ -230,6 +224,8 @@  static void copy_from_brd(void *dst, str
 	size_t copy;
 
 	copy = min_t(size_t, n, PAGE_SIZE - offset);
+
+	rcu_read_lock();
 	page = brd_lookup_page(brd, sector);
 	if (page) {
 		src = kmap_atomic(page);
@@ -237,11 +233,14 @@  static void copy_from_brd(void *dst, str
 		kunmap_atomic(src);
 	} else
 		memset(dst, 0, copy);
+	rcu_read_unlock();
 
 	if (copy < n) {
 		dst += copy;
 		sector += copy >> SECTOR_SHIFT;
 		copy = n - copy;
+
+		rcu_read_lock();
 		page = brd_lookup_page(brd, sector);
 		if (page) {
 			src = kmap_atomic(page);
@@ -249,6 +248,7 @@  static void copy_from_brd(void *dst, str
 			kunmap_atomic(src);
 		} else
 			memset(dst, 0, copy);
+		rcu_read_unlock();
 	}
 }