From fef246672b009cf3f7a74e2fc9a76932ef2eeed2 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 31 Jul 2009 11:49:10 -0400 Subject: [PATCH 1/5] block: Make blk_queue_stack_limits use the new stacking interface blk_queue_stack_limits() has been superceded by blk_stack_limits() and disk_stack_limits(). Wrap the function call for now, we'll deprecate it later. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 8a3ea3bba10d..8e86e2d2b147 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -433,27 +433,7 @@ EXPORT_SYMBOL(blk_queue_io_opt); **/ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) { - /* zero is "infinity" */ - t->limits.max_sectors = min_not_zero(queue_max_sectors(t), - queue_max_sectors(b)); - - t->limits.max_hw_sectors = min_not_zero(queue_max_hw_sectors(t), - queue_max_hw_sectors(b)); - - t->limits.seg_boundary_mask = min_not_zero(queue_segment_boundary(t), - queue_segment_boundary(b)); - - t->limits.max_phys_segments = min_not_zero(queue_max_phys_segments(t), - queue_max_phys_segments(b)); - - t->limits.max_hw_segments = min_not_zero(queue_max_hw_segments(t), - queue_max_hw_segments(b)); - - t->limits.max_segment_size = min_not_zero(queue_max_segment_size(t), - queue_max_segment_size(b)); - - t->limits.logical_block_size = max(queue_logical_block_size(t), - queue_logical_block_size(b)); + blk_stack_limits(&t->limits, &b->limits, 0); if (!t->queue_lock) WARN_ON_ONCE(1); From 7c958e32649e0c35801762878fb0b6da8c55a515 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 31 Jul 2009 11:49:11 -0400 Subject: [PATCH 2/5] block: Add a wrapper for setting minimum request size without a queue Introduce blk_limits_io_min() and make blk_queue_io_min() call it. Signed-off-by: Mike Snitzer Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 31 ++++++++++++++++++++++++------- include/linux/blkdev.h | 1 + 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 8e86e2d2b147..1f7197434166 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -383,6 +383,29 @@ void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset) } EXPORT_SYMBOL(blk_queue_alignment_offset); +/** + * blk_limits_io_min - set minimum request size for a device + * @limits: the queue limits + * @min: smallest I/O size in bytes + * + * Description: + * Some devices have an internal block size bigger than the reported + * hardware sector size. This function can be used to signal the + * smallest I/O the device can perform without incurring a performance + * penalty. + */ +void blk_limits_io_min(struct queue_limits *limits, unsigned int min) +{ + limits->io_min = min; + + if (limits->io_min < limits->logical_block_size) + limits->io_min = limits->logical_block_size; + + if (limits->io_min < limits->physical_block_size) + limits->io_min = limits->physical_block_size; +} +EXPORT_SYMBOL(blk_limits_io_min); + /** * blk_queue_io_min - set minimum request size for the queue * @q: the request queue for the device @@ -396,13 +419,7 @@ EXPORT_SYMBOL(blk_queue_alignment_offset); */ void blk_queue_io_min(struct request_queue *q, unsigned int min) { - q->limits.io_min = min; - - if (q->limits.io_min < q->limits.logical_block_size) - q->limits.io_min = q->limits.logical_block_size; - - if (q->limits.io_min < q->limits.physical_block_size) - q->limits.io_min = q->limits.physical_block_size; + blk_limits_io_min(&q->limits, min); } EXPORT_SYMBOL(blk_queue_io_min); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e7cb5dbf6c26..69103e053c92 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -913,6 +913,7 @@ extern void blk_queue_logical_block_size(struct request_queue *, unsigned short) extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); +extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); extern void blk_set_default_limits(struct queue_limits *lim); From 70dd5bf3b99964d52862ad2810c24cc32a553535 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 31 Jul 2009 11:49:12 -0400 Subject: [PATCH 3/5] block: Stack optimal I/O size When stacking block devices ensure that optimal I/O size is scaled accordingly. Signed-off-by: Martin K. Petersen Reviewed-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-settings.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/block/blk-settings.c b/block/blk-settings.c index 1f7197434166..e1327ddfc13b 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -7,6 +7,7 @@ #include #include #include /* for max_pfn/max_low_pfn */ +#include #include "blk.h" @@ -520,6 +521,16 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, return -1; } + /* Find lcm() of optimal I/O size */ + if (t->io_opt && b->io_opt) + t->io_opt = (t->io_opt * b->io_opt) / gcd(t->io_opt, b->io_opt); + else if (b->io_opt) + t->io_opt = b->io_opt; + + /* Verify that optimal I/O size is a multiple of io_min */ + if (t->io_min && t->io_opt % t->io_min) + return -1; + return 0; } EXPORT_SYMBOL(blk_stack_limits); From 7e5f5fb09e6fc657f21816b5a18ba645a913368e Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 31 Jul 2009 11:49:13 -0400 Subject: [PATCH 4/5] block: Update topology documentation Update topology comments and sysfs documentation based upon discussions with Neil Brown. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- Documentation/ABI/testing/sysfs-block | 37 +++++++++++++++++---------- block/blk-settings.c | 19 +++++++++----- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index cbbd3e069945..5f3bedaf8e35 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -94,28 +94,37 @@ What: /sys/block//queue/physical_block_size Date: May 2009 Contact: Martin K. Petersen Description: - This is the smallest unit the storage device can write - without resorting to read-modify-write operation. It is - usually the same as the logical block size but may be - bigger. One example is SATA drives with 4KB sectors - that expose a 512-byte logical block size to the - operating system. + This is the smallest unit a physical storage device can + write atomically. It is usually the same as the logical + block size but may be bigger. One example is SATA + drives with 4KB sectors that expose a 512-byte logical + block size to the operating system. For stacked block + devices the physical_block_size variable contains the + maximum physical_block_size of the component devices. What: /sys/block//queue/minimum_io_size Date: April 2009 Contact: Martin K. Petersen Description: - Storage devices may report a preferred minimum I/O size, - which is the smallest request the device can perform - without incurring a read-modify-write penalty. For disk - drives this is often the physical block size. For RAID - arrays it is often the stripe chunk size. + Storage devices may report a granularity or preferred + minimum I/O size which is the smallest request the + device can perform without incurring a performance + penalty. For disk drives this is often the physical + block size. For RAID arrays it is often the stripe + chunk size. A properly aligned multiple of + minimum_io_size is the preferred request size for + workloads where a high number of I/O operations is + desired. What: /sys/block//queue/optimal_io_size Date: April 2009 Contact: Martin K. Petersen Description: Storage devices may report an optimal I/O size, which is - the device's preferred unit of receiving I/O. This is - rarely reported for disk drives. For RAID devices it is - usually the stripe width or the internal block size. + the device's preferred unit for sustained I/O. This is + rarely reported for disk drives. For RAID arrays it is + usually the stripe width or the internal track size. A + properly aligned multiple of optimal_io_size is the + preferred request size for workloads where sustained + throughput is desired. If no optimal I/O size is + reported this file contains 0. diff --git a/block/blk-settings.c b/block/blk-settings.c index e1327ddfc13b..476d87065073 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -413,10 +413,13 @@ EXPORT_SYMBOL(blk_limits_io_min); * @min: smallest I/O size in bytes * * Description: - * Some devices have an internal block size bigger than the reported - * hardware sector size. This function can be used to signal the - * smallest I/O the device can perform without incurring a performance - * penalty. + * Storage devices may report a granularity or preferred minimum I/O + * size which is the smallest request the device can perform without + * incurring a performance penalty. For disk drives this is often the + * physical block size. For RAID arrays it is often the stripe chunk + * size. A properly aligned multiple of minimum_io_size is the + * preferred request size for workloads where a high number of I/O + * operations is desired. */ void blk_queue_io_min(struct request_queue *q, unsigned int min) { @@ -430,8 +433,12 @@ EXPORT_SYMBOL(blk_queue_io_min); * @opt: optimal request size in bytes * * Description: - * Drivers can call this function to set the preferred I/O request - * size for devices that report such a value. + * Storage devices may report an optimal I/O size, which is the + * device's preferred unit for sustained I/O. This is rarely reported + * for disk drives. For RAID arrays it is usually the stripe width or + * the internal track size. A properly aligned multiple of + * optimal_io_size is the preferred request size for workloads where + * sustained throughput is desired. */ void blk_queue_io_opt(struct request_queue *q, unsigned int opt) { From 14d9fa352592582e457cf75022202766baac1348 Mon Sep 17 00:00:00 2001 From: John Stoffel Date: Tue, 4 Aug 2009 22:10:17 +0200 Subject: [PATCH 5/5] Make SCSI SG v4 driver enabled by default and remove EXPERIMENTAL dependency, since udev depends on BSG Make Block Layer SG support v4 the default, since recent udev versions depend on this to access serial numbers and other low level info properly. This should be backported to older kernels as well, since most distros have enabled this for a long time. Signed-off-by: John Stoffel Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/Kconfig | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/block/Kconfig b/block/Kconfig index 95a86adc33a1..9be0b56eaee1 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -48,9 +48,9 @@ config LBDAF If unsure, say Y. config BLK_DEV_BSG - bool "Block layer SG support v4 (EXPERIMENTAL)" - depends on EXPERIMENTAL - ---help--- + bool "Block layer SG support v4" + default y + help Saying Y here will enable generic SG (SCSI generic) v4 support for any block device. @@ -60,7 +60,10 @@ config BLK_DEV_BSG protocols (e.g. Task Management Functions and SMP in Serial Attached SCSI). - If unsure, say N. + This option is required by recent UDEV versions to properly + access device serial numbers, etc. + + If unsure, say Y. config BLK_DEV_INTEGRITY bool "Block layer data integrity support"