From 5d1a536621403b442eef60ddd35e6b7a46fb04b7 Mon Sep 17 00:00:00 2001
From: Aaron Carroll <aaronc@gelato.unsw.edu.au>
Date: Tue, 30 Oct 2007 10:40:12 +0100
Subject: [PATCH 1/5] Deadline iosched: Factor out finding latter reques

Factor finding the next request in sector-sorted order into
a function deadline_latter_request.

Signed-off-by: Aaron Carroll <aaronc@gelato.unsw.edu.au>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/deadline-iosched.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 1a511ffaf8a4..a44437e5a94b 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -55,6 +55,20 @@ static void deadline_move_request(struct deadline_data *, struct request *);
 
 #define RQ_RB_ROOT(dd, rq)	(&(dd)->sort_list[rq_data_dir((rq))])
 
+/*
+ * get the request after `rq' in sector-sorted order
+ */
+static inline struct request *
+deadline_latter_request(struct request *rq)
+{
+	struct rb_node *node = rb_next(&rq->rb_node);
+
+	if (node)
+		return rb_entry_rq(node);
+
+	return NULL;
+}
+
 static void
 deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
 {
@@ -74,13 +88,8 @@ deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
 {
 	const int data_dir = rq_data_dir(rq);
 
-	if (dd->next_rq[data_dir] == rq) {
-		struct rb_node *rbnext = rb_next(&rq->rb_node);
-
-		dd->next_rq[data_dir] = NULL;
-		if (rbnext)
-			dd->next_rq[data_dir] = rb_entry_rq(rbnext);
-	}
+	if (dd->next_rq[data_dir] == rq)
+		dd->next_rq[data_dir] = deadline_latter_request(rq);
 
 	elv_rb_del(RQ_RB_ROOT(dd, rq), rq);
 }
@@ -198,14 +207,11 @@ static void
 deadline_move_request(struct deadline_data *dd, struct request *rq)
 {
 	const int data_dir = rq_data_dir(rq);
-	struct rb_node *rbnext = rb_next(&rq->rb_node);
 
 	dd->next_rq[READ] = NULL;
 	dd->next_rq[WRITE] = NULL;
+	dd->next_rq[data_dir] = deadline_latter_request(rq);
 
-	if (rbnext)
-		dd->next_rq[data_dir] = rb_entry_rq(rbnext);
-	
 	dd->last_sector = rq->sector + rq->nr_sectors;
 
 	/*

From dfb3d72a9aa519672c9ae06f0d2f93eccb35482f Mon Sep 17 00:00:00 2001
From: Aaron Carroll <aaronc@gelato.unsw.edu.au>
Date: Tue, 30 Oct 2007 10:40:13 +0100
Subject: [PATCH 2/5] Deadline iosched: Reset batch for ordered requests

The deadline I/O scheduler does not reset the batch count when starting
a new batch at a higher-sectored request.  This means the second and
subsequent batch in the same data direction will never exceed a single
request in size whenever higher-sectored requests are pending.

This patch gives new batches in the same data direction as old ones
their full quota of requests by resetting the batch count.

Signed-off-by: Aaron Carroll <aaronc@gelato.unsw.edu.au>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/deadline-iosched.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index a44437e5a94b..cb94c838087a 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -306,12 +306,11 @@ static int deadline_dispatch_requests(struct request_queue *q, int force)
 dispatch_find_request:
 	/*
 	 * we are not running a batch, find best request for selected data_dir
+	 * and start a new batch
 	 */
 	if (deadline_check_fifo(dd, data_dir)) {
 		/* An expired request exists - satisfy it */
-		dd->batching = 0;
 		rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
-		
 	} else if (dd->next_rq[data_dir]) {
 		/*
 		 * The last req was the same dir and we have a next request in
@@ -325,12 +324,13 @@ static int deadline_dispatch_requests(struct request_queue *q, int force)
 		 * higher-sectored requests. Go back to the lowest sectored
 		 * request (1 way elevator) and start a new batch.
 		 */
-		dd->batching = 0;
 		node = rb_first(&dd->sort_list[data_dir]);
 		if (node)
 			rq = rb_entry_rq(node);
 	}
 
+	dd->batching = 0;
+
 dispatch_request:
 	/*
 	 * rq is the selected appropriate request.

From 6f5d8aa6382eef2b26032c88656270bdae7f0c42 Mon Sep 17 00:00:00 2001
From: Aaron Carroll <aaronc@gelato.unsw.edu.au>
Date: Tue, 30 Oct 2007 10:40:13 +0100
Subject: [PATCH 3/5] Deadline iosched: Fix batching fairness

After switching data directions, deadline always starts the next batch
from the lowest-sector request.  This gives excessive deadline expiries
and large latency and throughput disparity between high- and low-sector
requests; an order of magnitude in some tests.

This patch changes the batching behaviour so new batches start from the
request whose expiry is earliest.

Signed-off-by: Aaron Carroll <aaronc@gelato.unsw.edu.au>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/deadline-iosched.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index cb94c838087a..a054eef8dff6 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -306,27 +306,20 @@ static int deadline_dispatch_requests(struct request_queue *q, int force)
 dispatch_find_request:
 	/*
 	 * we are not running a batch, find best request for selected data_dir
-	 * and start a new batch
 	 */
-	if (deadline_check_fifo(dd, data_dir)) {
-		/* An expired request exists - satisfy it */
+	if (deadline_check_fifo(dd, data_dir) || !dd->next_rq[data_dir]) {
+		/*
+		 * A deadline has expired, the last request was in the other
+		 * direction, or we have run out of higher-sectored requests.
+		 * Start again from the request with the earliest expiry time.
+		 */
 		rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
-	} else if (dd->next_rq[data_dir]) {
+	} else {
 		/*
 		 * The last req was the same dir and we have a next request in
 		 * sort order. No expired requests so continue on from here.
 		 */
 		rq = dd->next_rq[data_dir];
-	} else {
-		struct rb_node *node;
-		/*
-		 * The last req was the other direction or we have run out of
-		 * higher-sectored requests. Go back to the lowest sectored
-		 * request (1 way elevator) and start a new batch.
-		 */
-		node = rb_first(&dd->sort_list[data_dir]);
-		if (node)
-			rq = rb_entry_rq(node);
 	}
 
 	dd->batching = 0;

From 5ec140e600b7d6624c657f008833f0e71bd5ef48 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@sw.ru>
Date: Wed, 31 Oct 2007 08:33:24 +0100
Subject: [PATCH 4/5] dm: bounce_pfn limit added

Device mapper uses its own bounce_pfn that may differ from one on underlying
device. In that way dm can build incorrect requests that contain sg elements
greater than underlying device is able to handle.

This is the cause of slab corruption in i2o layer, occurred on i386 arch when
very long direct IO requests are addressed to dm-over-i2o device.

Signed-off-by: Vasily Averin <vvs@sw.ru>
Cc: <stable@kernel.org>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/md/dm-table.c         | 7 +++++++
 include/linux/device-mapper.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 8939e6105088..5a7eb650181e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -102,6 +102,8 @@ static void combine_restrictions_low(struct io_restrictions *lhs,
 	lhs->seg_boundary_mask =
 		min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask);
 
+	lhs->bounce_pfn = min_not_zero(lhs->bounce_pfn, rhs->bounce_pfn);
+
 	lhs->no_cluster |= rhs->no_cluster;
 }
 
@@ -566,6 +568,8 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
 		min_not_zero(rs->seg_boundary_mask,
 			     q->seg_boundary_mask);
 
+	rs->bounce_pfn = min_not_zero(rs->bounce_pfn, q->bounce_pfn);
+
 	rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 }
 EXPORT_SYMBOL_GPL(dm_set_device_limits);
@@ -707,6 +711,8 @@ static void check_for_valid_limits(struct io_restrictions *rs)
 		rs->max_segment_size = MAX_SEGMENT_SIZE;
 	if (!rs->seg_boundary_mask)
 		rs->seg_boundary_mask = -1;
+	if (!rs->bounce_pfn)
+		rs->bounce_pfn = -1;
 }
 
 int dm_table_add_target(struct dm_table *t, const char *type,
@@ -891,6 +897,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
 	q->hardsect_size = t->limits.hardsect_size;
 	q->max_segment_size = t->limits.max_segment_size;
 	q->seg_boundary_mask = t->limits.seg_boundary_mask;
+	q->bounce_pfn = t->limits.bounce_pfn;
 	if (t->limits.no_cluster)
 		q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER);
 	else
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 37c66d1254b5..b8b7c51389fe 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -116,6 +116,7 @@ struct io_restrictions {
 	unsigned short		hardsect_size;
 	unsigned int		max_segment_size;
 	unsigned long		seg_boundary_mask;
+	unsigned long		bounce_pfn;
 	unsigned char		no_cluster; /* inverted so that 0 is default */
 };
 

From 51fd77bd9f512ab6cc9df0733ba1caaab89eb957 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 2 Nov 2007 08:49:08 +0100
Subject: [PATCH 5/5] [BLOCK] Don't allow empty barriers to be passed down to
 queues that don't grok them

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/ll_rw_blk.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 56f2646612e6..3e4e8bfbe856 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3221,6 +3221,7 @@ static inline void __generic_make_request(struct bio *bio)
 	sector_t old_sector;
 	int ret, nr_sectors = bio_sectors(bio);
 	dev_t old_dev;
+	int err = -EIO;
 
 	might_sleep();
 
@@ -3248,7 +3249,7 @@ static inline void __generic_make_request(struct bio *bio)
 				bdevname(bio->bi_bdev, b),
 				(long long) bio->bi_sector);
 end_io:
-			bio_endio(bio, -EIO);
+			bio_endio(bio, err);
 			break;
 		}
 
@@ -3283,6 +3284,10 @@ static inline void __generic_make_request(struct bio *bio)
 
 		if (bio_check_eod(bio, nr_sectors))
 			goto end_io;
+		if (bio_empty_barrier(bio) && !q->prepare_flush_fn) {
+			err = -EOPNOTSUPP;
+			goto end_io;
+		}
 
 		ret = q->make_request_fn(q, bio);
 	} while (ret);