A set of device-mapper fixes for 3.12.

A few fixes for dm-snapshot, a 32 bit fix for dm-stats, a couple error
 handling fixes for dm-multipath.  A fix for the thin provisioning target
 to not expose non-zero discard limits if discards are disabled.
 
 Lastly, add two DM module parameters which allow users to tune the
 emergency memory reserves that DM mainatins per device -- this helps fix
 a long-standing issue for dm-multipath.  The conservative default
 reserve for request-based dm-multipath devices (256) has proven
 problematic for users with many multipathed SCSI devices but relatively
 little memory.  To responsibly select a smaller value users should use
 the new nr_bios tracepoint info (via commit 75afb352 "block: Add nr_bios
 to block_rq_remap tracepoint") to determine the peak number of bios
 their workloads create.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.14 (GNU/Linux)
 
 iQEcBAABAgAGBQJSQMVHAAoJEMUj8QotnQNaOXgIAJS6/XJKMoHfiDJ9M+XD34rZ
 Uyr9TEnubX3DKCRBiY23MUcCQn3fx6BjCGv5/c8L4jQFIuLyDi2yatqpwXcbGSJh
 G/S/y6u0Axek+ew7TS80OFop4nblW6MoKnoh9/4N55Ofa+1WvKM4ERUGjHGbauyS
 TxmLQPToCFPLYRIOZ+imd6hQuIZ1+FFdJFvi7kY9O6Llx2sLD6fWi1iruBd/Da2H
 ByMX3biGN45mSpcBzRbSC/FkJ9CRIvT9n82BDPS0o3Tllt8NaVlEDaovB7h4ncc0
 bFuT2Z3Q38B9uZ8Lj0bqdGzv3kXMLCkLo6WhWjyUt84hmDPAzRpBwt60jUqWyZs=
 =bjVp
 -----END PGP SIGNATURE-----

Merge tag 'dm-3.12-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device-mapper fixes from Mike Snitzer:
 "A few fixes for dm-snapshot, a 32 bit fix for dm-stats, a couple error
  handling fixes for dm-multipath.  A fix for the thin provisioning
  target to not expose non-zero discard limits if discards are disabled.

  Lastly, add two DM module parameters which allow users to tune the
  emergency memory reserves that DM mainatins per device -- this helps
  fix a long-standing issue for dm-multipath.  The conservative default
  reserve for request-based dm-multipath devices (256) has proven
  problematic for users with many multipathed SCSI devices but
  relatively little memory.  To responsibly select a smaller value users
  should use the new nr_bios tracepoint info (via commit 75afb352
  "block: Add nr_bios to block_rq_remap tracepoint") to determine the
  peak number of bios their workloads create"

* tag 'dm-3.12-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm: add reserved_bio_based_ios module parameter
  dm: add reserved_rq_based_ios module parameter
  dm: lower bio-based mempool reservation
  dm thin: do not expose non-zero discard limits if discards disabled
  dm mpath: disable WRITE SAME if it fails
  dm-snapshot: fix performance degradation due to small hash size
  dm snapshot: workaround for a false positive lockdep warning
  dm stats: fix possible counter corruption on 32-bit systems
  dm mpath: do not fail path on -ENOSPC
This commit is contained in:
Linus Torvalds 2013-09-25 15:12:46 -07:00
commit e93dd910b9
9 changed files with 120 additions and 26 deletions

View file

@ -19,8 +19,6 @@
#define DM_MSG_PREFIX "io"
#define DM_IO_MAX_REGIONS BITS_PER_LONG
#define MIN_IOS 16
#define MIN_BIOS 16
struct dm_io_client {
mempool_t *pool;
@ -50,16 +48,17 @@ static struct kmem_cache *_dm_io_cache;
struct dm_io_client *dm_io_client_create(void)
{
struct dm_io_client *client;
unsigned min_ios = dm_get_reserved_bio_based_ios();
client = kmalloc(sizeof(*client), GFP_KERNEL);
if (!client)
return ERR_PTR(-ENOMEM);
client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache);
client->pool = mempool_create_slab_pool(min_ios, _dm_io_cache);
if (!client->pool)
goto bad;
client->bios = bioset_create(MIN_BIOS, 0);
client->bios = bioset_create(min_ios, 0);
if (!client->bios)
goto bad;

View file

@ -7,6 +7,7 @@
#include <linux/device-mapper.h>
#include "dm.h"
#include "dm-path-selector.h"
#include "dm-uevent.h"
@ -116,8 +117,6 @@ struct dm_mpath_io {
typedef int (*action_fn) (struct pgpath *pgpath);
#define MIN_IOS 256 /* Mempool size */
static struct kmem_cache *_mpio_cache;
static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
@ -190,6 +189,7 @@ static void free_priority_group(struct priority_group *pg,
static struct multipath *alloc_multipath(struct dm_target *ti)
{
struct multipath *m;
unsigned min_ios = dm_get_reserved_rq_based_ios();
m = kzalloc(sizeof(*m), GFP_KERNEL);
if (m) {
@ -202,7 +202,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
INIT_WORK(&m->trigger_event, trigger_event);
init_waitqueue_head(&m->pg_init_wait);
mutex_init(&m->work_mutex);
m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
if (!m->mpio_pool) {
kfree(m);
return NULL;
@ -1268,6 +1268,7 @@ static int noretry_error(int error)
case -EREMOTEIO:
case -EILSEQ:
case -ENODATA:
case -ENOSPC:
return 1;
}
@ -1298,8 +1299,17 @@ static int do_end_io(struct multipath *m, struct request *clone,
if (!error && !clone->errors)
return 0; /* I/O complete */
if (noretry_error(error))
if (noretry_error(error)) {
if ((clone->cmd_flags & REQ_WRITE_SAME) &&
!clone->q->limits.max_write_same_sectors) {
struct queue_limits *limits;
/* device doesn't really support WRITE SAME, disable it */
limits = dm_get_queue_limits(dm_table_get_md(m->ti->table));
limits->max_write_same_sectors = 0;
}
return error;
}
if (mpio->pgpath)
fail_path(mpio->pgpath);

View file

@ -256,7 +256,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
*/
INIT_WORK_ONSTACK(&req.work, do_metadata);
queue_work(ps->metadata_wq, &req.work);
flush_work(&req.work);
flush_workqueue(ps->metadata_wq);
return req.result;
}

View file

@ -725,17 +725,16 @@ static int calc_max_buckets(void)
*/
static int init_hash_tables(struct dm_snapshot *s)
{
sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
sector_t hash_size, cow_dev_size, max_buckets;
/*
* Calculate based on the size of the original volume or
* the COW volume...
*/
cow_dev_size = get_dev_size(s->cow->bdev);
origin_dev_size = get_dev_size(s->origin->bdev);
max_buckets = calc_max_buckets();
hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
hash_size = cow_dev_size >> s->store->chunk_shift;
hash_size = min(hash_size, max_buckets);
if (hash_size < 64)

View file

@ -451,19 +451,26 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
struct dm_stat_percpu *p;
/*
* For strict correctness we should use local_irq_disable/enable
* For strict correctness we should use local_irq_save/restore
* instead of preempt_disable/enable.
*
* This is racy if the driver finishes bios from non-interrupt
* context as well as from interrupt context or from more different
* interrupts.
* preempt_disable/enable is racy if the driver finishes bios
* from non-interrupt context as well as from interrupt context
* or from more different interrupts.
*
* However, the race only results in not counting some events,
* so it is acceptable.
* On 64-bit architectures the race only results in not counting some
* events, so it is acceptable. On 32-bit architectures the race could
* cause the counter going off by 2^32, so we need to do proper locking
* there.
*
* part_stat_lock()/part_stat_unlock() have this race too.
*/
#if BITS_PER_LONG == 32
unsigned long flags;
local_irq_save(flags);
#else
preempt_disable();
#endif
p = &s->stat_percpu[smp_processor_id()][entry];
if (!end) {
@ -478,7 +485,11 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
p->ticks[idx] += duration;
}
#if BITS_PER_LONG == 32
local_irq_restore(flags);
#else
preempt_enable();
#endif
}
static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,

View file

@ -2095,6 +2095,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
* them down to the data device. The thin device's discard
* processing will cause mappings to be removed from the btree.
*/
ti->discard_zeroes_data_unsupported = true;
if (pf.discard_enabled && pf.discard_passdown) {
ti->num_discard_bios = 1;
@ -2104,7 +2105,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
* thin devices' discard limits consistent).
*/
ti->discards_supported = true;
ti->discard_zeroes_data_unsupported = true;
}
ti->private = pt;
@ -2689,8 +2689,16 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
* They get transferred to the live pool in bind_control_target()
* called from pool_preresume().
*/
if (!pt->adjusted_pf.discard_enabled)
if (!pt->adjusted_pf.discard_enabled) {
/*
* Must explicitly disallow stacking discard limits otherwise the
* block layer will stack them if pool's data device has support.
* QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the
* user to see that, so make sure to set all discard limits to 0.
*/
limits->discard_granularity = 0;
return;
}
disable_passdown_if_not_supported(pt);
@ -2826,10 +2834,10 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook);
/* In case the pool supports discards, pass them on. */
ti->discard_zeroes_data_unsupported = true;
if (tc->pool->pf.discard_enabled) {
ti->discards_supported = true;
ti->num_discard_bios = 1;
ti->discard_zeroes_data_unsupported = true;
/* Discard bios must be split on a block boundary */
ti->split_discard_bios = true;
}

View file

@ -211,10 +211,55 @@ struct dm_md_mempools {
struct bio_set *bs;
};
#define MIN_IOS 256
#define RESERVED_BIO_BASED_IOS 16
#define RESERVED_REQUEST_BASED_IOS 256
#define RESERVED_MAX_IOS 1024
static struct kmem_cache *_io_cache;
static struct kmem_cache *_rq_tio_cache;
/*
* Bio-based DM's mempools' reserved IOs set by the user.
*/
static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
/*
* Request-based DM's mempools' reserved IOs set by the user.
*/
static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
static unsigned __dm_get_reserved_ios(unsigned *reserved_ios,
unsigned def, unsigned max)
{
unsigned ios = ACCESS_ONCE(*reserved_ios);
unsigned modified_ios = 0;
if (!ios)
modified_ios = def;
else if (ios > max)
modified_ios = max;
if (modified_ios) {
(void)cmpxchg(reserved_ios, ios, modified_ios);
ios = modified_ios;
}
return ios;
}
unsigned dm_get_reserved_bio_based_ios(void)
{
return __dm_get_reserved_ios(&reserved_bio_based_ios,
RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS);
}
EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
unsigned dm_get_reserved_rq_based_ios(void)
{
return __dm_get_reserved_ios(&reserved_rq_based_ios,
RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS);
}
EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
static int __init local_init(void)
{
int r = -ENOMEM;
@ -2277,6 +2322,17 @@ struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
return md->immutable_target_type;
}
/*
* The queue_limits are only valid as long as you have a reference
* count on 'md'.
*/
struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
{
BUG_ON(!atomic_read(&md->holders));
return &md->queue->limits;
}
EXPORT_SYMBOL_GPL(dm_get_queue_limits);
/*
* Fully initialize a request-based queue (->elevator, ->request_fn, etc).
*/
@ -2862,18 +2918,18 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u
if (type == DM_TYPE_BIO_BASED) {
cachep = _io_cache;
pool_size = 16;
pool_size = dm_get_reserved_bio_based_ios();
front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
} else if (type == DM_TYPE_REQUEST_BASED) {
cachep = _rq_tio_cache;
pool_size = MIN_IOS;
pool_size = dm_get_reserved_rq_based_ios();
front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
/* per_bio_data_size is not used. See __bind_mempools(). */
WARN_ON(per_bio_data_size != 0);
} else
goto out;
pools->io_pool = mempool_create_slab_pool(MIN_IOS, cachep);
pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
if (!pools->io_pool)
goto out;
@ -2924,6 +2980,13 @@ module_exit(dm_exit);
module_param(major, uint, 0);
MODULE_PARM_DESC(major, "The major number of the device mapper");
module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
MODULE_DESCRIPTION(DM_NAME " driver");
MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
MODULE_LICENSE("GPL");

View file

@ -184,6 +184,9 @@ void dm_free_md_mempools(struct dm_md_mempools *pools);
/*
* Helpers that are used by DM core
*/
unsigned dm_get_reserved_bio_based_ios(void);
unsigned dm_get_reserved_rq_based_ios(void);
static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
{
return !maxlen || strlen(result) + 1 >= maxlen;

View file

@ -406,13 +406,14 @@ int dm_noflush_suspending(struct dm_target *ti);
union map_info *dm_get_mapinfo(struct bio *bio);
union map_info *dm_get_rq_mapinfo(struct request *rq);
struct queue_limits *dm_get_queue_limits(struct mapped_device *md);
/*
* Geometry functions.
*/
int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo);
int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo);
/*-----------------------------------------------------------------
* Functions for manipulating device-mapper tables.
*---------------------------------------------------------------*/