diff mbox

[14/14] barriers

Message ID Pine.LNX.4.64.0902231422490.23293@hs20-bc2-1.build.redhat.com (mailing list archive)
State Superseded, archived
Delegated to: Alasdair Kergon
Headers show

Commit Message

Mikulas Patocka Feb. 23, 2009, 7:23 p.m. UTC
Barrier support.

Barriers are submitted to a worker thread that issues them in-order.

__process_bio functions is modified that when it sees a barrier request,
it waits for all pending IO before the request, then submits the barrier
and waits for it.

DM_ENDIO_REQUEUE doesn't work and I doubt that it can be made work with
barriers.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 drivers/md/dm.c |   75 +++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 61 insertions(+), 14 deletions(-)


--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Comments

Nikanth K Feb. 24, 2009, 10:11 a.m. UTC | #1
On Tue, Feb 24, 2009 at 12:53 AM, Mikulas Patocka <mpatocka@redhat.com> wrote:

<snip>

>
> +static int dm_flush(struct mapped_device *md)
> +{
> +       dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
> +       return 0;
> +}
> +

Always returns zero! Why not void?

<snip>

Thanks
Nikanth

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Alasdair G Kergon March 24, 2009, 2:24 p.m. UTC | #2
On Mon, Feb 23, 2009 at 02:23:00PM -0500, Mikulas Patocka wrote:
> DM_ENDIO_REQUEUE doesn't work and I doubt that it can be made work with
> barriers.
 
Didn't we discuss this before?
We *require* DM_ENDIO_REQUEUE functionality - I can't apply a patch that
breaks it.  But it is not something that can be used arbitrarily - it is
only used in situations where core dm is firmly in control (and I/O cannot get
through to the device e.g. all paths down in multipath) and I don't see why it
can't continue to work with barriers.

I hope it's just the comment that is wrong here - or does the patch need
updating?

Alasdair
Mikulas Patocka March 24, 2009, 2:30 p.m. UTC | #3
On Tue, 24 Mar 2009, Alasdair G Kergon wrote:

> On Mon, Feb 23, 2009 at 02:23:00PM -0500, Mikulas Patocka wrote:
> > DM_ENDIO_REQUEUE doesn't work and I doubt that it can be made work with
> > barriers.
>  
> Didn't we discuss this before?
> We *require* DM_ENDIO_REQUEUE functionality - I can't apply a patch that
> breaks it.  But it is not something that can be used arbitrarily - it is
> only used in situations where core dm is firmly in control (and I/O cannot get
> through to the device e.g. all paths down in multipath) and I don't see why it
> can't continue to work with barriers.
> 
> I hope it's just the comment that is wrong here - or does the patch need
> updating?

Oh, sorry, that is old comment for the old version of that patch. I fixed 
the code but forgot to remove the comment. The code in the patch should be 
correct w.r.t. DM_ENDIO_REQUEUE.

Mikulas

> Alasdair
> -- 
> agk@redhat.com
> 

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
diff mbox

Patch

Index: linux-2.6.29-rc6-devel/drivers/md/dm.c
===================================================================
--- linux-2.6.29-rc6-devel.orig/drivers/md/dm.c	2009-02-23 17:55:37.000000000 +0100
+++ linux-2.6.29-rc6-devel/drivers/md/dm.c	2009-02-23 17:55:45.000000000 +0100
@@ -125,6 +125,11 @@  struct mapped_device {
 	spinlock_t deferred_lock;
 
 	/*
+	 * An error from the barrier request currently being processed.
+	 */
+	int barrier_error;
+
+	/*
 	 * Processing queue (flush/barriers)
 	 */
 	struct workqueue_struct *wq;
@@ -425,6 +430,10 @@  static void end_io_acct(struct dm_io *io
 	part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
 	part_stat_unlock();
 
+	/*
+	 * after this is decremented, the bio must not be touched if it is
+	 * barrier bio
+	 */
 	dm_disk(md)->part0.in_flight = pending =
 		atomic_dec_return(&md->pending);
 
@@ -525,19 +534,29 @@  static void dec_pending(struct dm_io *io
 			 */
 			spin_lock_irqsave(&io->md->deferred_lock, flags);
 			if (__noflush_suspending(io->md))
-				bio_list_add(&io->md->deferred, io->bio);
+				bio_list_add_head(&io->md->deferred, io->bio);
 			else
 				/* noflush suspend was interrupted. */
 				io->error = -EIO;
 			spin_unlock_irqrestore(&io->md->deferred_lock, flags);
 		}
 
-		end_io_acct(io);
+		if (bio_barrier(io->bio)) {
+			/*
+			 * There could be just one barrier request, so we use
+			 * per-device variable for error reporting is OK.
+			 * Note that you can't touch the bio after end_io_acct
+			 */
+			io->md->barrier_error = io->error;
+			end_io_acct(io);
+		} else {
+			end_io_acct(io);
 
-		if (io->error != DM_ENDIO_REQUEUE) {
-			trace_block_bio_complete(io->md->queue, io->bio);
+			if (io->error != DM_ENDIO_REQUEUE) {
+				trace_block_bio_complete(io->md->queue, io->bio);
 
-			bio_endio(io->bio, io->error);
+				bio_endio(io->bio, io->error);
+			}
 		}
 
 		free_io(io->md, io);
@@ -682,7 +701,7 @@  static struct bio *split_bvec(struct bio
 
 	clone->bi_sector = sector;
 	clone->bi_bdev = bio->bi_bdev;
-	clone->bi_rw = bio->bi_rw;
+	clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
 	clone->bi_vcnt = 1;
 	clone->bi_size = to_bytes(len);
 	clone->bi_io_vec->bv_offset = offset;
@@ -703,6 +722,7 @@  static struct bio *clone_bio(struct bio 
 
 	clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
 	__bio_clone(clone, bio);
+	clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
 	clone->bi_destructor = dm_bio_destructor;
 	clone->bi_sector = sector;
 	clone->bi_idx = idx;
@@ -823,7 +843,10 @@  static void __process_bio(struct mapped_
 
 	ci.map = dm_get_table(md);
 	if (unlikely(!ci.map)) {
-		bio_io_error(bio);
+		if (!bio_barrier(bio))
+			bio_io_error(bio);
+		else
+			md->barrier_error = -EIO;
 		return;
 	}
 
@@ -911,11 +934,6 @@  static int dm_request(struct request_que
 	 * There is no use in forwarding any barrier request since we can't
 	 * guarantee it is (or can be) handled by the targets correctly.
 	 */
-	if (unlikely(bio_barrier(bio))) {
-		bio_endio(bio, -EOPNOTSUPP);
-		return 0;
-	}
-
 	down_read(&md->io_lock);
 
 	cpu = part_stat_lock();
@@ -927,7 +945,8 @@  static int dm_request(struct request_que
 	 * If we're suspended we have to queue
 	 * this io for later.
 	 */
-	if (unlikely(test_bit(DMF_BLOCK_IO, &md->flags))) {
+	if (unlikely(test_bit(DMF_BLOCK_IO, &md->flags)) ||
+	    unlikely(bio_barrier(bio))) {
 		up_read(&md->io_lock);
 
 		if (unlikely(test_bit(DMF_BLOCK_FOR_SUSPEND, &md->flags)) &&
@@ -1392,6 +1411,12 @@  static int dm_wait_for_completion(struct
 	return r;
 }
 
+static int dm_flush(struct mapped_device *md)
+{
+	dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
+	return 0;
+}
+
 /*
  * Process the deferred bios
  */
@@ -1413,8 +1438,30 @@  static void dm_wq_work(struct work_struc
 		}
 		up_write(&md->io_lock);
 
-		__process_bio(md, c);
+		if (!bio_barrier(c))
+			__process_bio(md, c);
+		else {
+			int error = dm_flush(md);
+			if (unlikely(error)) {
+				bio_endio(c, error);
+				goto next_bio;
+			}
+			if (bio_empty_barrier(c)) {
+				bio_endio(c, 0);
+				goto next_bio;
+			}
+
+			__process_bio(md, c);
+
+			error = dm_flush(md);
+			if (!error && md->barrier_error)
+				error = md->barrier_error;
+
+			if (md->barrier_error != DM_ENDIO_REQUEUE)
+				bio_endio(c, error);
+		}
 
+next_bio:
 		down_write(&md->io_lock);
 	}