Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm

* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm:
  dm snapshot: extend exception store functions
  dm snapshot: split out exception store implementations
  dm snapshot: rename struct exception_store
  dm snapshot: separate out exception store interface
  dm mpath: move trigger_event to system workqueue
  dm: add name and uuid to sysfs
  dm table: rework reference counting
  dm: support barriers on simple devices
  dm request: extend target interface
  dm request: add caches
  dm ioctl: allow dm_copy_name_and_uuid to return only one field
  dm log: ensure log bitmap fits on log device
  dm log: move region_size validation
  dm log: avoid reinitialising io_req on every operation
  dm: consolidate target deregistration error handling
  dm raid1: fix error count
  dm log: fix dm_io_client leak on error paths
  dm snapshot: change yield to msleep
  dm table: drop reference at unbind
This commit is contained in:
Linus Torvalds 2009-01-05 19:20:59 -08:00
commit 238c6d5483
22 changed files with 1321 additions and 985 deletions

View file

@ -3,9 +3,10 @@
#
dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
dm-ioctl.o dm-io.o dm-kcopyd.o
dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o
dm-multipath-objs := dm-path-selector.o dm-mpath.o
dm-snapshot-objs := dm-snap.o dm-exception-store.o
dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \
dm-snap-persistent.o
dm-mirror-objs := dm-raid1.o
md-mod-objs := md.o bitmap.o
raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \

View file

@ -1322,11 +1322,7 @@ static int __init dm_crypt_init(void)
static void __exit dm_crypt_exit(void)
{
int r = dm_unregister_target(&crypt_target);
if (r < 0)
DMERR("unregister failed %d", r);
dm_unregister_target(&crypt_target);
kmem_cache_destroy(_crypt_io_pool);
}

View file

@ -364,11 +364,7 @@ static int __init dm_delay_init(void)
static void __exit dm_delay_exit(void)
{
int r = dm_unregister_target(&delay_target);
if (r < 0)
DMERR("unregister failed %d", r);
dm_unregister_target(&delay_target);
kmem_cache_destroy(delayed_cache);
destroy_workqueue(kdelayd_wq);
}

View file

@ -1,756 +1,45 @@
/*
* dm-exception-store.c
*
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
* Copyright (C) 2006 Red Hat GmbH
* Copyright (C) 2006-2008 Red Hat GmbH
*
* This file is released under the GPL.
*/
#include "dm-snap.h"
#include "dm-exception-store.h"
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/dm-io.h>
#include <linux/dm-kcopyd.h>
#define DM_MSG_PREFIX "snapshots"
#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */
#define DM_MSG_PREFIX "snapshot exception stores"
/*-----------------------------------------------------------------
* Persistent snapshots, by persistent we mean that the snapshot
* will survive a reboot.
*---------------------------------------------------------------*/
/*
* We need to store a record of which parts of the origin have
* been copied to the snapshot device. The snapshot code
* requires that we copy exception chunks to chunk aligned areas
* of the COW store. It makes sense therefore, to store the
* metadata in chunk size blocks.
*
* There is no backward or forward compatibility implemented,
* snapshots with different disk versions than the kernel will
* not be usable. It is expected that "lvcreate" will blank out
* the start of a fresh COW device before calling the snapshot
* constructor.
*
* The first chunk of the COW device just contains the header.
* After this there is a chunk filled with exception metadata,
* followed by as many exception chunks as can fit in the
* metadata areas.
*
* All on disk structures are in little-endian format. The end
* of the exceptions info is indicated by an exception with a
* new_chunk of 0, which is invalid since it would point to the
* header chunk.
*/
/*
* Magic for persistent snapshots: "SnAp" - Feeble isn't it.
*/
#define SNAP_MAGIC 0x70416e53
/*
* The on-disk version of the metadata.
*/
#define SNAPSHOT_DISK_VERSION 1
struct disk_header {
uint32_t magic;
/*
* Is this snapshot valid. There is no way of recovering
* an invalid snapshot.
*/
uint32_t valid;
/*
* Simple, incrementing version. no backward
* compatibility.
*/
uint32_t version;
/* In sectors */
uint32_t chunk_size;
};
struct disk_exception {
uint64_t old_chunk;
uint64_t new_chunk;
};
struct commit_callback {
void (*callback)(void *, int success);
void *context;
};
/*
* The top level structure for a persistent exception store.
*/
struct pstore {
struct dm_snapshot *snap; /* up pointer to my snapshot */
int version;
int valid;
uint32_t exceptions_per_area;
/*
* Now that we have an asynchronous kcopyd there is no
* need for large chunk sizes, so it wont hurt to have a
* whole chunks worth of metadata in memory at once.
*/
void *area;
/*
* An area of zeros used to clear the next area.
*/
void *zero_area;
/*
* Used to keep track of which metadata area the data in
* 'chunk' refers to.
*/
chunk_t current_area;
/*
* The next free chunk for an exception.
*/
chunk_t next_free;
/*
* The index of next free exception in the current
* metadata area.
*/
uint32_t current_committed;
atomic_t pending_count;
uint32_t callback_count;
struct commit_callback *callbacks;
struct dm_io_client *io_client;
struct workqueue_struct *metadata_wq;
};
static unsigned sectors_to_pages(unsigned sectors)
{
return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9);
}
static int alloc_area(struct pstore *ps)
{
int r = -ENOMEM;
size_t len;
len = ps->snap->chunk_size << SECTOR_SHIFT;
/*
* Allocate the chunk_size block of memory that will hold
* a single metadata area.
*/
ps->area = vmalloc(len);
if (!ps->area)
return r;
ps->zero_area = vmalloc(len);
if (!ps->zero_area) {
vfree(ps->area);
return r;
}
memset(ps->zero_area, 0, len);
return 0;
}
static void free_area(struct pstore *ps)
{
vfree(ps->area);
ps->area = NULL;
vfree(ps->zero_area);
ps->zero_area = NULL;
}
struct mdata_req {
struct dm_io_region *where;
struct dm_io_request *io_req;
struct work_struct work;
int result;
};
static void do_metadata(struct work_struct *work)
{
struct mdata_req *req = container_of(work, struct mdata_req, work);
req->result = dm_io(req->io_req, 1, req->where, NULL);
}
/*
* Read or write a chunk aligned and sized block of data from a device.
*/
static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
{
struct dm_io_region where = {
.bdev = ps->snap->cow->bdev,
.sector = ps->snap->chunk_size * chunk,
.count = ps->snap->chunk_size,
};
struct dm_io_request io_req = {
.bi_rw = rw,
.mem.type = DM_IO_VMA,
.mem.ptr.vma = ps->area,
.client = ps->io_client,
.notify.fn = NULL,
};
struct mdata_req req;
if (!metadata)
return dm_io(&io_req, 1, &where, NULL);
req.where = &where;
req.io_req = &io_req;
/*
* Issue the synchronous I/O from a different thread
* to avoid generic_make_request recursion.
*/
INIT_WORK(&req.work, do_metadata);
queue_work(ps->metadata_wq, &req.work);
flush_workqueue(ps->metadata_wq);
return req.result;
}
/*
* Convert a metadata area index to a chunk index.
*/
static chunk_t area_location(struct pstore *ps, chunk_t area)
{
return 1 + ((ps->exceptions_per_area + 1) * area);
}
/*
* Read or write a metadata area. Remembering to skip the first
* chunk which holds the header.
*/
static int area_io(struct pstore *ps, int rw)
int dm_exception_store_init(void)
{
int r;
chunk_t chunk;
chunk = area_location(ps, ps->current_area);
r = dm_transient_snapshot_init();
if (r) {
DMERR("Unable to register transient exception store type.");
goto transient_fail;
}
r = chunk_io(ps, chunk, rw, 0);
if (r)
return r;
r = dm_persistent_snapshot_init();
if (r) {
DMERR("Unable to register persistent exception store type");
goto persistent_fail;
}
return 0;
}
static void zero_memory_area(struct pstore *ps)
{
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
}
static int zero_disk_area(struct pstore *ps, chunk_t area)
{
struct dm_io_region where = {
.bdev = ps->snap->cow->bdev,
.sector = ps->snap->chunk_size * area_location(ps, area),
.count = ps->snap->chunk_size,
};
struct dm_io_request io_req = {
.bi_rw = WRITE,
.mem.type = DM_IO_VMA,
.mem.ptr.vma = ps->zero_area,
.client = ps->io_client,
.notify.fn = NULL,
};
return dm_io(&io_req, 1, &where, NULL);
}
static int read_header(struct pstore *ps, int *new_snapshot)
{
int r;
struct disk_header *dh;
chunk_t chunk_size;
int chunk_size_supplied = 1;
/*
* Use default chunk size (or hardsect_size, if larger) if none supplied
*/
if (!ps->snap->chunk_size) {
ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
bdev_hardsect_size(ps->snap->cow->bdev) >> 9);
ps->snap->chunk_mask = ps->snap->chunk_size - 1;
ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1;
chunk_size_supplied = 0;
}
ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap->
chunk_size));
if (IS_ERR(ps->io_client))
return PTR_ERR(ps->io_client);
r = alloc_area(ps);
if (r)
return r;
r = chunk_io(ps, 0, READ, 1);
if (r)
goto bad;
dh = (struct disk_header *) ps->area;
if (le32_to_cpu(dh->magic) == 0) {
*new_snapshot = 1;
return 0;
}
if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
DMWARN("Invalid or corrupt snapshot");
r = -ENXIO;
goto bad;
}
*new_snapshot = 0;
ps->valid = le32_to_cpu(dh->valid);
ps->version = le32_to_cpu(dh->version);
chunk_size = le32_to_cpu(dh->chunk_size);
if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size)
return 0;
DMWARN("chunk size %llu in device metadata overrides "
"table chunk size of %llu.",
(unsigned long long)chunk_size,
(unsigned long long)ps->snap->chunk_size);
/* We had a bogus chunk_size. Fix stuff up. */
free_area(ps);
ps->snap->chunk_size = chunk_size;
ps->snap->chunk_mask = chunk_size - 1;
ps->snap->chunk_shift = ffs(chunk_size) - 1;
r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size),
ps->io_client);
if (r)
return r;
r = alloc_area(ps);
return r;
bad:
free_area(ps);
persistent_fail:
dm_persistent_snapshot_exit();
transient_fail:
return r;
}
static int write_header(struct pstore *ps)
void dm_exception_store_exit(void)
{
struct disk_header *dh;
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
dh = (struct disk_header *) ps->area;
dh->magic = cpu_to_le32(SNAP_MAGIC);
dh->valid = cpu_to_le32(ps->valid);
dh->version = cpu_to_le32(ps->version);
dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
return chunk_io(ps, 0, WRITE, 1);
}
/*
* Access functions for the disk exceptions, these do the endian conversions.
*/
static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
{
BUG_ON(index >= ps->exceptions_per_area);
return ((struct disk_exception *) ps->area) + index;
}
static void read_exception(struct pstore *ps,
uint32_t index, struct disk_exception *result)
{
struct disk_exception *e = get_exception(ps, index);
/* copy it */
result->old_chunk = le64_to_cpu(e->old_chunk);
result->new_chunk = le64_to_cpu(e->new_chunk);
}
static void write_exception(struct pstore *ps,
uint32_t index, struct disk_exception *de)
{
struct disk_exception *e = get_exception(ps, index);
/* copy it */
e->old_chunk = cpu_to_le64(de->old_chunk);
e->new_chunk = cpu_to_le64(de->new_chunk);
}
/*
* Registers the exceptions that are present in the current area.
* 'full' is filled in to indicate if the area has been
* filled.
*/
static int insert_exceptions(struct pstore *ps, int *full)
{
int r;
unsigned int i;
struct disk_exception de;
/* presume the area is full */
*full = 1;
for (i = 0; i < ps->exceptions_per_area; i++) {
read_exception(ps, i, &de);
/*
* If the new_chunk is pointing at the start of
* the COW device, where the first metadata area
* is we know that we've hit the end of the
* exceptions. Therefore the area is not full.
*/
if (de.new_chunk == 0LL) {
ps->current_committed = i;
*full = 0;
break;
}
/*
* Keep track of the start of the free chunks.
*/
if (ps->next_free <= de.new_chunk)
ps->next_free = de.new_chunk + 1;
/*
* Otherwise we add the exception to the snapshot.
*/
r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk);
if (r)
return r;
}
return 0;
}
static int read_exceptions(struct pstore *ps)
{
int r, full = 1;
/*
* Keeping reading chunks and inserting exceptions until
* we find a partially full area.
*/
for (ps->current_area = 0; full; ps->current_area++) {
r = area_io(ps, READ);
if (r)
return r;
r = insert_exceptions(ps, &full);
if (r)
return r;
}
ps->current_area--;
return 0;
}
static struct pstore *get_info(struct exception_store *store)
{
return (struct pstore *) store->context;
}
static void persistent_fraction_full(struct exception_store *store,
sector_t *numerator, sector_t *denominator)
{
*numerator = get_info(store)->next_free * store->snap->chunk_size;
*denominator = get_dev_size(store->snap->cow->bdev);
}
static void persistent_destroy(struct exception_store *store)
{
struct pstore *ps = get_info(store);
destroy_workqueue(ps->metadata_wq);
dm_io_client_destroy(ps->io_client);
vfree(ps->callbacks);
free_area(ps);
kfree(ps);
}
static int persistent_read_metadata(struct exception_store *store)
{
int r, uninitialized_var(new_snapshot);
struct pstore *ps = get_info(store);
/*
* Read the snapshot header.
*/
r = read_header(ps, &new_snapshot);
if (r)
return r;
/*
* Now we know correct chunk_size, complete the initialisation.
*/
ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
sizeof(struct disk_exception);
ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
sizeof(*ps->callbacks));
if (!ps->callbacks)
return -ENOMEM;
/*
* Do we need to setup a new snapshot ?
*/
if (new_snapshot) {
r = write_header(ps);
if (r) {
DMWARN("write_header failed");
return r;
}
ps->current_area = 0;
zero_memory_area(ps);
r = zero_disk_area(ps, 0);
if (r) {
DMWARN("zero_disk_area(0) failed");
return r;
}
} else {
/*
* Sanity checks.
*/
if (ps->version != SNAPSHOT_DISK_VERSION) {
DMWARN("unable to handle snapshot disk version %d",
ps->version);
return -EINVAL;
}
/*
* Metadata are valid, but snapshot is invalidated
*/
if (!ps->valid)
return 1;
/*
* Read the metadata.
*/
r = read_exceptions(ps);
if (r)
return r;
}
return 0;
}
static int persistent_prepare(struct exception_store *store,
struct dm_snap_exception *e)
{
struct pstore *ps = get_info(store);
uint32_t stride;
chunk_t next_free;
sector_t size = get_dev_size(store->snap->cow->bdev);
/* Is there enough room ? */
if (size < ((ps->next_free + 1) * store->snap->chunk_size))
return -ENOSPC;
e->new_chunk = ps->next_free;
/*
* Move onto the next free pending, making sure to take
* into account the location of the metadata chunks.
*/
stride = (ps->exceptions_per_area + 1);
next_free = ++ps->next_free;
if (sector_div(next_free, stride) == 1)
ps->next_free++;
atomic_inc(&ps->pending_count);
return 0;
}
static void persistent_commit(struct exception_store *store,
struct dm_snap_exception *e,
void (*callback) (void *, int success),
void *callback_context)
{
unsigned int i;
struct pstore *ps = get_info(store);
struct disk_exception de;
struct commit_callback *cb;
de.old_chunk = e->old_chunk;
de.new_chunk = e->new_chunk;
write_exception(ps, ps->current_committed++, &de);
/*
* Add the callback to the back of the array. This code
* is the only place where the callback array is
* manipulated, and we know that it will never be called
* multiple times concurrently.
*/
cb = ps->callbacks + ps->callback_count++;
cb->callback = callback;
cb->context = callback_context;
/*
* If there are exceptions in flight and we have not yet
* filled this metadata area there's nothing more to do.
*/
if (!atomic_dec_and_test(&ps->pending_count) &&
(ps->current_committed != ps->exceptions_per_area))
return;
/*
* If we completely filled the current area, then wipe the next one.
*/
if ((ps->current_committed == ps->exceptions_per_area) &&
zero_disk_area(ps, ps->current_area + 1))
ps->valid = 0;
/*
* Commit exceptions to disk.
*/
if (ps->valid && area_io(ps, WRITE))
ps->valid = 0;
/*
* Advance to the next area if this one is full.
*/
if (ps->current_committed == ps->exceptions_per_area) {
ps->current_committed = 0;
ps->current_area++;
zero_memory_area(ps);
}
for (i = 0; i < ps->callback_count; i++) {
cb = ps->callbacks + i;
cb->callback(cb->context, ps->valid);
}
ps->callback_count = 0;
}
static void persistent_drop(struct exception_store *store)
{
struct pstore *ps = get_info(store);
ps->valid = 0;
if (write_header(ps))
DMWARN("write header failed");
}
int dm_create_persistent(struct exception_store *store)
{
struct pstore *ps;
/* allocate the pstore */
ps = kmalloc(sizeof(*ps), GFP_KERNEL);
if (!ps)
return -ENOMEM;
ps->snap = store->snap;
ps->valid = 1;
ps->version = SNAPSHOT_DISK_VERSION;
ps->area = NULL;
ps->next_free = 2; /* skipping the header and first area */
ps->current_committed = 0;
ps->callback_count = 0;
atomic_set(&ps->pending_count, 0);
ps->callbacks = NULL;
ps->metadata_wq = create_singlethread_workqueue("ksnaphd");
if (!ps->metadata_wq) {
kfree(ps);
DMERR("couldn't start header metadata update thread");
return -ENOMEM;
}
store->destroy = persistent_destroy;
store->read_metadata = persistent_read_metadata;
store->prepare_exception = persistent_prepare;
store->commit_exception = persistent_commit;
store->drop_snapshot = persistent_drop;
store->fraction_full = persistent_fraction_full;
store->context = ps;
return 0;
}
/*-----------------------------------------------------------------
* Implementation of the store for non-persistent snapshots.
*---------------------------------------------------------------*/
struct transient_c {
sector_t next_free;
};
static void transient_destroy(struct exception_store *store)
{
kfree(store->context);
}
static int transient_read_metadata(struct exception_store *store)
{
return 0;
}
static int transient_prepare(struct exception_store *store,
struct dm_snap_exception *e)
{
struct transient_c *tc = (struct transient_c *) store->context;
sector_t size = get_dev_size(store->snap->cow->bdev);
if (size < (tc->next_free + store->snap->chunk_size))
return -1;
e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
tc->next_free += store->snap->chunk_size;
return 0;
}
static void transient_commit(struct exception_store *store,
struct dm_snap_exception *e,
void (*callback) (void *, int success),
void *callback_context)
{
/* Just succeed */
callback(callback_context, 1);
}
static void transient_fraction_full(struct exception_store *store,
sector_t *numerator, sector_t *denominator)
{
*numerator = ((struct transient_c *) store->context)->next_free;
*denominator = get_dev_size(store->snap->cow->bdev);
}
int dm_create_transient(struct exception_store *store)
{
struct transient_c *tc;
store->destroy = transient_destroy;
store->read_metadata = transient_read_metadata;
store->prepare_exception = transient_prepare;
store->commit_exception = transient_commit;
store->drop_snapshot = NULL;
store->fraction_full = transient_fraction_full;
tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
if (!tc)
return -ENOMEM;
tc->next_free = 0;
store->context = tc;
return 0;
dm_persistent_snapshot_exit();
dm_transient_snapshot_exit();
}

View file

@ -0,0 +1,148 @@
/*
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
* Copyright (C) 2008 Red Hat, Inc. All rights reserved.
*
* Device-mapper snapshot exception store.
*
* This file is released under the GPL.
*/
#ifndef _LINUX_DM_EXCEPTION_STORE
#define _LINUX_DM_EXCEPTION_STORE
#include <linux/blkdev.h>
#include <linux/device-mapper.h>
/*
* The snapshot code deals with largish chunks of the disk at a
* time. Typically 32k - 512k.
*/
typedef sector_t chunk_t;
/*
* An exception is used where an old chunk of data has been
* replaced by a new one.
* If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number
* of chunks that follow contiguously. Remaining bits hold the number of the
* chunk within the device.
*/
struct dm_snap_exception {
struct list_head hash_list;
chunk_t old_chunk;
chunk_t new_chunk;
};
/*
* Abstraction to handle the meta/layout of exception stores (the
* COW device).
*/
struct dm_exception_store {
/*
* Destroys this object when you've finished with it.
*/
void (*destroy) (struct dm_exception_store *store);
/*
* The target shouldn't read the COW device until this is
* called. As exceptions are read from the COW, they are
* reported back via the callback.
*/
int (*read_metadata) (struct dm_exception_store *store,
int (*callback)(void *callback_context,
chunk_t old, chunk_t new),
void *callback_context);
/*
* Find somewhere to store the next exception.
*/
int (*prepare_exception) (struct dm_exception_store *store,
struct dm_snap_exception *e);
/*
* Update the metadata with this exception.
*/
void (*commit_exception) (struct dm_exception_store *store,
struct dm_snap_exception *e,
void (*callback) (void *, int success),
void *callback_context);
/*
* The snapshot is invalid, note this in the metadata.
*/
void (*drop_snapshot) (struct dm_exception_store *store);
int (*status) (struct dm_exception_store *store, status_type_t status,
char *result, unsigned int maxlen);
/*
* Return how full the snapshot is.
*/
void (*fraction_full) (struct dm_exception_store *store,
sector_t *numerator,
sector_t *denominator);
struct dm_snapshot *snap;
void *context;
};
/*
* Funtions to manipulate consecutive chunks
*/
# if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
# define DM_CHUNK_CONSECUTIVE_BITS 8
# define DM_CHUNK_NUMBER_BITS 56
static inline chunk_t dm_chunk_number(chunk_t chunk)
{
return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL);
}
static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e)
{
return e->new_chunk >> DM_CHUNK_NUMBER_BITS;
}
static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
{
e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS);
BUG_ON(!dm_consecutive_chunk_count(e));
}
# else
# define DM_CHUNK_CONSECUTIVE_BITS 0
static inline chunk_t dm_chunk_number(chunk_t chunk)
{
return chunk;
}
static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e)
{
return 0;
}
static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
{
}
# endif
int dm_exception_store_init(void);
void dm_exception_store_exit(void);
/*
* Two exception store implementations.
*/
int dm_persistent_snapshot_init(void);
void dm_persistent_snapshot_exit(void);
int dm_transient_snapshot_init(void);
void dm_transient_snapshot_exit(void);
int dm_create_persistent(struct dm_exception_store *store);
int dm_create_transient(struct dm_exception_store *store);
#endif /* _LINUX_DM_EXCEPTION_STORE */

View file

@ -233,7 +233,7 @@ static void __hash_remove(struct hash_cell *hc)
}
if (hc->new_map)
dm_table_put(hc->new_map);
dm_table_destroy(hc->new_map);
dm_put(hc->md);
free_cell(hc);
}
@ -827,8 +827,8 @@ static int do_resume(struct dm_ioctl *param)
r = dm_swap_table(md, new_map);
if (r) {
dm_table_destroy(new_map);
dm_put(md);
dm_table_put(new_map);
return r;
}
@ -836,8 +836,6 @@ static int do_resume(struct dm_ioctl *param)
set_disk_ro(dm_disk(md), 0);
else
set_disk_ro(dm_disk(md), 1);
dm_table_put(new_map);
}
if (dm_suspended(md))
@ -1080,7 +1078,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
}
if (hc->new_map)
dm_table_put(hc->new_map);
dm_table_destroy(hc->new_map);
hc->new_map = t;
up_write(&_hash_lock);
@ -1109,7 +1107,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
}
if (hc->new_map) {
dm_table_put(hc->new_map);
dm_table_destroy(hc->new_map);
hc->new_map = NULL;
}
@ -1550,8 +1548,10 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid)
goto out;
}
strcpy(name, hc->name);
strcpy(uuid, hc->uuid ? : "");
if (name)
strcpy(name, hc->name);
if (uuid)
strcpy(uuid, hc->uuid ? : "");
out:
up_read(&_hash_lock);

View file

@ -142,6 +142,7 @@ static struct target_type linear_target = {
.status = linear_status,
.ioctl = linear_ioctl,
.merge = linear_merge,
.features = DM_TARGET_SUPPORTS_BARRIERS,
};
int __init dm_linear_init(void)
@ -156,8 +157,5 @@ int __init dm_linear_init(void)
void dm_linear_exit(void)
{
int r = dm_unregister_target(&linear_target);
if (r < 0)
DMERR("unregister failed %d", r);
dm_unregister_target(&linear_target);
}

View file

@ -326,8 +326,6 @@ static void header_from_disk(struct log_header *core, struct log_header *disk)
static int rw_header(struct log_c *lc, int rw)
{
lc->io_req.bi_rw = rw;
lc->io_req.mem.ptr.vma = lc->disk_header;
lc->io_req.notify.fn = NULL;
return dm_io(&lc->io_req, 1, &lc->header_location, NULL);
}
@ -362,10 +360,15 @@ static int read_header(struct log_c *log)
return 0;
}
static inline int write_header(struct log_c *log)
static int _check_region_size(struct dm_target *ti, uint32_t region_size)
{
header_to_disk(&log->header, log->disk_header);
return rw_header(log, WRITE);
if (region_size < 2 || region_size > ti->len)
return 0;
if (!is_power_of_2(region_size))
return 0;
return 1;
}
/*----------------------------------------------------------------
@ -403,8 +406,9 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
}
}
if (sscanf(argv[0], "%u", &region_size) != 1) {
DMWARN("invalid region size string");
if (sscanf(argv[0], "%u", &region_size) != 1 ||
!_check_region_size(ti, region_size)) {
DMWARN("invalid region size %s", argv[0]);
return -EINVAL;
}
@ -453,8 +457,18 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
*/
buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) +
bitset_size, ti->limits.hardsect_size);
if (buf_size > dev->bdev->bd_inode->i_size) {
DMWARN("log device %s too small: need %llu bytes",
dev->name, (unsigned long long)buf_size);
kfree(lc);
return -EINVAL;
}
lc->header_location.count = buf_size >> SECTOR_SHIFT;
lc->io_req.mem.type = DM_IO_VMA;
lc->io_req.notify.fn = NULL;
lc->io_req.client = dm_io_client_create(dm_div_up(buf_size,
PAGE_SIZE));
if (IS_ERR(lc->io_req.client)) {
@ -467,10 +481,12 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
lc->disk_header = vmalloc(buf_size);
if (!lc->disk_header) {
DMWARN("couldn't allocate disk log buffer");
dm_io_client_destroy(lc->io_req.client);
kfree(lc);
return -ENOMEM;
}
lc->io_req.mem.ptr.vma = lc->disk_header;
lc->clean_bits = (void *)lc->disk_header +
(LOG_OFFSET << SECTOR_SHIFT);
}
@ -482,6 +498,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
DMWARN("couldn't allocate sync bitset");
if (!dev)
vfree(lc->clean_bits);
else
dm_io_client_destroy(lc->io_req.client);
vfree(lc->disk_header);
kfree(lc);
return -ENOMEM;
@ -495,6 +513,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
vfree(lc->sync_bits);
if (!dev)
vfree(lc->clean_bits);
else
dm_io_client_destroy(lc->io_req.client);
vfree(lc->disk_header);
kfree(lc);
return -ENOMEM;
@ -631,8 +651,10 @@ static int disk_resume(struct dm_dirty_log *log)
/* set the correct number of regions in the header */
lc->header.nr_regions = lc->region_count;
header_to_disk(&lc->header, lc->disk_header);
/* write the new header */
r = write_header(lc);
r = rw_header(lc, WRITE);
if (r) {
DMWARN("%s: Failed to write header on dirty region log device",
lc->log_dev->name);
@ -682,7 +704,7 @@ static int disk_flush(struct dm_dirty_log *log)
if (!lc->touched)
return 0;
r = write_header(lc);
r = rw_header(lc, WRITE);
if (r)
fail_log_device(lc);
else

View file

@ -889,7 +889,7 @@ static int fail_path(struct pgpath *pgpath)
dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
pgpath->path.dev->name, m->nr_valid_paths);
queue_work(kmultipathd, &m->trigger_event);
schedule_work(&m->trigger_event);
queue_work(kmultipathd, &pgpath->deactivate_path);
out:
@ -932,7 +932,7 @@ static int reinstate_path(struct pgpath *pgpath)
dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
pgpath->path.dev->name, m->nr_valid_paths);
queue_work(kmultipathd, &m->trigger_event);
schedule_work(&m->trigger_event);
out:
spin_unlock_irqrestore(&m->lock, flags);
@ -976,7 +976,7 @@ static void bypass_pg(struct multipath *m, struct priority_group *pg,
spin_unlock_irqrestore(&m->lock, flags);
queue_work(kmultipathd, &m->trigger_event);
schedule_work(&m->trigger_event);
}
/*
@ -1006,7 +1006,7 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
}
spin_unlock_irqrestore(&m->lock, flags);
queue_work(kmultipathd, &m->trigger_event);
schedule_work(&m->trigger_event);
return 0;
}
@ -1495,14 +1495,10 @@ static int __init dm_multipath_init(void)
static void __exit dm_multipath_exit(void)
{
int r;
destroy_workqueue(kmpath_handlerd);
destroy_workqueue(kmultipathd);
r = dm_unregister_target(&multipath_target);
if (r < 0)
DMERR("target unregister failed %d", r);
dm_unregister_target(&multipath_target);
kmem_cache_destroy(_mpio_cache);
}

View file

@ -197,9 +197,6 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
struct mirror_set *ms = m->ms;
struct mirror *new;
if (!errors_handled(ms))
return;
/*
* error_count is used for nothing more than a
* simple way to tell if a device has encountered
@ -210,6 +207,9 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
if (test_and_set_bit(error_type, &m->error_type))
return;
if (!errors_handled(ms))
return;
if (m != get_default_mirror(ms))
goto out;
@ -808,12 +808,6 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti,
kfree(ms);
}
static inline int _check_region_size(struct dm_target *ti, uint32_t size)
{
return !(size % (PAGE_SIZE >> 9) || !is_power_of_2(size) ||
size > ti->len);
}
static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
unsigned int mirror, char **argv)
{
@ -872,12 +866,6 @@ static struct dm_dirty_log *create_dirty_log(struct dm_target *ti,
return NULL;
}
if (!_check_region_size(ti, dl->type->get_region_size(dl))) {
ti->error = "Invalid region size";
dm_dirty_log_destroy(dl);
return NULL;
}
return dl;
}
@ -1300,11 +1288,7 @@ static int __init dm_mirror_init(void)
static void __exit dm_mirror_exit(void)
{
int r;
r = dm_unregister_target(&mirror_target);
if (r < 0)
DMERR("unregister failed %d", r);
dm_unregister_target(&mirror_target);
}
/* Module hooks */

View file

@ -0,0 +1,704 @@
/*
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
* Copyright (C) 2006-2008 Red Hat GmbH
*
* This file is released under the GPL.
*/
#include "dm-exception-store.h"
#include "dm-snap.h"
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/dm-io.h>
#define DM_MSG_PREFIX "persistent snapshot"
#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */
/*-----------------------------------------------------------------
* Persistent snapshots, by persistent we mean that the snapshot
* will survive a reboot.
*---------------------------------------------------------------*/
/*
* We need to store a record of which parts of the origin have
* been copied to the snapshot device. The snapshot code
* requires that we copy exception chunks to chunk aligned areas
* of the COW store. It makes sense therefore, to store the
* metadata in chunk size blocks.
*
* There is no backward or forward compatibility implemented,
* snapshots with different disk versions than the kernel will
* not be usable. It is expected that "lvcreate" will blank out
* the start of a fresh COW device before calling the snapshot
* constructor.
*
* The first chunk of the COW device just contains the header.
* After this there is a chunk filled with exception metadata,
* followed by as many exception chunks as can fit in the
* metadata areas.
*
* All on disk structures are in little-endian format. The end
* of the exceptions info is indicated by an exception with a
* new_chunk of 0, which is invalid since it would point to the
* header chunk.
*/
/*
* Magic for persistent snapshots: "SnAp" - Feeble isn't it.
*/
#define SNAP_MAGIC 0x70416e53
/*
* The on-disk version of the metadata.
*/
#define SNAPSHOT_DISK_VERSION 1
struct disk_header {
uint32_t magic;
/*
* Is this snapshot valid. There is no way of recovering
* an invalid snapshot.
*/
uint32_t valid;
/*
* Simple, incrementing version. no backward
* compatibility.
*/
uint32_t version;
/* In sectors */
uint32_t chunk_size;
};
struct disk_exception {
uint64_t old_chunk;
uint64_t new_chunk;
};
struct commit_callback {
void (*callback)(void *, int success);
void *context;
};
/*
* The top level structure for a persistent exception store.
*/
struct pstore {
struct dm_snapshot *snap; /* up pointer to my snapshot */
int version;
int valid;
uint32_t exceptions_per_area;
/*
* Now that we have an asynchronous kcopyd there is no
* need for large chunk sizes, so it wont hurt to have a
* whole chunks worth of metadata in memory at once.
*/
void *area;
/*
* An area of zeros used to clear the next area.
*/
void *zero_area;
/*
* Used to keep track of which metadata area the data in
* 'chunk' refers to.
*/
chunk_t current_area;
/*
* The next free chunk for an exception.
*/
chunk_t next_free;
/*
* The index of next free exception in the current
* metadata area.
*/
uint32_t current_committed;
atomic_t pending_count;
uint32_t callback_count;
struct commit_callback *callbacks;
struct dm_io_client *io_client;
struct workqueue_struct *metadata_wq;
};
static unsigned sectors_to_pages(unsigned sectors)
{
return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9);
}
static int alloc_area(struct pstore *ps)
{
int r = -ENOMEM;
size_t len;
len = ps->snap->chunk_size << SECTOR_SHIFT;
/*
* Allocate the chunk_size block of memory that will hold
* a single metadata area.
*/
ps->area = vmalloc(len);
if (!ps->area)
return r;
ps->zero_area = vmalloc(len);
if (!ps->zero_area) {
vfree(ps->area);
return r;
}
memset(ps->zero_area, 0, len);
return 0;
}
static void free_area(struct pstore *ps)
{
vfree(ps->area);
ps->area = NULL;
vfree(ps->zero_area);
ps->zero_area = NULL;
}
struct mdata_req {
struct dm_io_region *where;
struct dm_io_request *io_req;
struct work_struct work;
int result;
};
static void do_metadata(struct work_struct *work)
{
struct mdata_req *req = container_of(work, struct mdata_req, work);
req->result = dm_io(req->io_req, 1, req->where, NULL);
}
/*
* Read or write a chunk aligned and sized block of data from a device.
*/
static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
{
struct dm_io_region where = {
.bdev = ps->snap->cow->bdev,
.sector = ps->snap->chunk_size * chunk,
.count = ps->snap->chunk_size,
};
struct dm_io_request io_req = {
.bi_rw = rw,
.mem.type = DM_IO_VMA,
.mem.ptr.vma = ps->area,
.client = ps->io_client,
.notify.fn = NULL,
};
struct mdata_req req;
if (!metadata)
return dm_io(&io_req, 1, &where, NULL);
req.where = &where;
req.io_req = &io_req;
/*
* Issue the synchronous I/O from a different thread
* to avoid generic_make_request recursion.
*/
INIT_WORK(&req.work, do_metadata);
queue_work(ps->metadata_wq, &req.work);
flush_workqueue(ps->metadata_wq);
return req.result;
}
/*
* Convert a metadata area index to a chunk index.
*/
static chunk_t area_location(struct pstore *ps, chunk_t area)
{
return 1 + ((ps->exceptions_per_area + 1) * area);
}
/*
* Read or write a metadata area. Remembering to skip the first
* chunk which holds the header.
*/
static int area_io(struct pstore *ps, int rw)
{
int r;
chunk_t chunk;
chunk = area_location(ps, ps->current_area);
r = chunk_io(ps, chunk, rw, 0);
if (r)
return r;
return 0;
}
static void zero_memory_area(struct pstore *ps)
{
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
}
static int zero_disk_area(struct pstore *ps, chunk_t area)
{
struct dm_io_region where = {
.bdev = ps->snap->cow->bdev,
.sector = ps->snap->chunk_size * area_location(ps, area),
.count = ps->snap->chunk_size,
};
struct dm_io_request io_req = {
.bi_rw = WRITE,
.mem.type = DM_IO_VMA,
.mem.ptr.vma = ps->zero_area,
.client = ps->io_client,
.notify.fn = NULL,
};
return dm_io(&io_req, 1, &where, NULL);
}
static int read_header(struct pstore *ps, int *new_snapshot)
{
int r;
struct disk_header *dh;
chunk_t chunk_size;
int chunk_size_supplied = 1;
/*
* Use default chunk size (or hardsect_size, if larger) if none supplied
*/
if (!ps->snap->chunk_size) {
ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
bdev_hardsect_size(ps->snap->cow->bdev) >> 9);
ps->snap->chunk_mask = ps->snap->chunk_size - 1;
ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1;
chunk_size_supplied = 0;
}
ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap->
chunk_size));
if (IS_ERR(ps->io_client))
return PTR_ERR(ps->io_client);
r = alloc_area(ps);
if (r)
return r;
r = chunk_io(ps, 0, READ, 1);
if (r)
goto bad;
dh = (struct disk_header *) ps->area;
if (le32_to_cpu(dh->magic) == 0) {
*new_snapshot = 1;
return 0;
}
if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
DMWARN("Invalid or corrupt snapshot");
r = -ENXIO;
goto bad;
}
*new_snapshot = 0;
ps->valid = le32_to_cpu(dh->valid);
ps->version = le32_to_cpu(dh->version);
chunk_size = le32_to_cpu(dh->chunk_size);
if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size)
return 0;
DMWARN("chunk size %llu in device metadata overrides "
"table chunk size of %llu.",
(unsigned long long)chunk_size,
(unsigned long long)ps->snap->chunk_size);
/* We had a bogus chunk_size. Fix stuff up. */
free_area(ps);
ps->snap->chunk_size = chunk_size;
ps->snap->chunk_mask = chunk_size - 1;
ps->snap->chunk_shift = ffs(chunk_size) - 1;
r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size),
ps->io_client);
if (r)
return r;
r = alloc_area(ps);
return r;
bad:
free_area(ps);
return r;
}
static int write_header(struct pstore *ps)
{
struct disk_header *dh;
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
dh = (struct disk_header *) ps->area;
dh->magic = cpu_to_le32(SNAP_MAGIC);
dh->valid = cpu_to_le32(ps->valid);
dh->version = cpu_to_le32(ps->version);
dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
return chunk_io(ps, 0, WRITE, 1);
}
/*
* Access functions for the disk exceptions, these do the endian conversions.
*/
static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
{
BUG_ON(index >= ps->exceptions_per_area);
return ((struct disk_exception *) ps->area) + index;
}
static void read_exception(struct pstore *ps,
uint32_t index, struct disk_exception *result)
{
struct disk_exception *e = get_exception(ps, index);
/* copy it */
result->old_chunk = le64_to_cpu(e->old_chunk);
result->new_chunk = le64_to_cpu(e->new_chunk);
}
static void write_exception(struct pstore *ps,
uint32_t index, struct disk_exception *de)
{
struct disk_exception *e = get_exception(ps, index);
/* copy it */
e->old_chunk = cpu_to_le64(de->old_chunk);
e->new_chunk = cpu_to_le64(de->new_chunk);
}
/*
* Registers the exceptions that are present in the current area.
* 'full' is filled in to indicate if the area has been
* filled.
*/
static int insert_exceptions(struct pstore *ps,
int (*callback)(void *callback_context,
chunk_t old, chunk_t new),
void *callback_context,
int *full)
{
int r;
unsigned int i;
struct disk_exception de;
/* presume the area is full */
*full = 1;
for (i = 0; i < ps->exceptions_per_area; i++) {
read_exception(ps, i, &de);
/*
* If the new_chunk is pointing at the start of
* the COW device, where the first metadata area
* is we know that we've hit the end of the
* exceptions. Therefore the area is not full.
*/
if (de.new_chunk == 0LL) {
ps->current_committed = i;
*full = 0;
break;
}
/*
* Keep track of the start of the free chunks.
*/
if (ps->next_free <= de.new_chunk)
ps->next_free = de.new_chunk + 1;
/*
* Otherwise we add the exception to the snapshot.
*/
r = callback(callback_context, de.old_chunk, de.new_chunk);
if (r)
return r;
}
return 0;
}
static int read_exceptions(struct pstore *ps,
int (*callback)(void *callback_context, chunk_t old,
chunk_t new),
void *callback_context)
{
int r, full = 1;
/*
* Keeping reading chunks and inserting exceptions until
* we find a partially full area.
*/
for (ps->current_area = 0; full; ps->current_area++) {
r = area_io(ps, READ);
if (r)
return r;
r = insert_exceptions(ps, callback, callback_context, &full);
if (r)
return r;
}
ps->current_area--;
return 0;
}
static struct pstore *get_info(struct dm_exception_store *store)
{
return (struct pstore *) store->context;
}
static void persistent_fraction_full(struct dm_exception_store *store,
sector_t *numerator, sector_t *denominator)
{
*numerator = get_info(store)->next_free * store->snap->chunk_size;
*denominator = get_dev_size(store->snap->cow->bdev);
}
static void persistent_destroy(struct dm_exception_store *store)
{
struct pstore *ps = get_info(store);
destroy_workqueue(ps->metadata_wq);
dm_io_client_destroy(ps->io_client);
vfree(ps->callbacks);
free_area(ps);
kfree(ps);
}
static int persistent_read_metadata(struct dm_exception_store *store,
int (*callback)(void *callback_context,
chunk_t old, chunk_t new),
void *callback_context)
{
int r, uninitialized_var(new_snapshot);
struct pstore *ps = get_info(store);
/*
* Read the snapshot header.
*/
r = read_header(ps, &new_snapshot);
if (r)
return r;
/*
* Now we know correct chunk_size, complete the initialisation.
*/
ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
sizeof(struct disk_exception);
ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
sizeof(*ps->callbacks));
if (!ps->callbacks)
return -ENOMEM;
/*
* Do we need to setup a new snapshot ?
*/
if (new_snapshot) {
r = write_header(ps);
if (r) {
DMWARN("write_header failed");
return r;
}
ps->current_area = 0;
zero_memory_area(ps);
r = zero_disk_area(ps, 0);
if (r) {
DMWARN("zero_disk_area(0) failed");
return r;
}
} else {
/*
* Sanity checks.
*/
if (ps->version != SNAPSHOT_DISK_VERSION) {
DMWARN("unable to handle snapshot disk version %d",
ps->version);
return -EINVAL;
}
/*
* Metadata are valid, but snapshot is invalidated
*/
if (!ps->valid)
return 1;
/*
* Read the metadata.
*/
r = read_exceptions(ps, callback, callback_context);
if (r)
return r;
}
return 0;
}
static int persistent_prepare_exception(struct dm_exception_store *store,
struct dm_snap_exception *e)
{
struct pstore *ps = get_info(store);
uint32_t stride;
chunk_t next_free;
sector_t size = get_dev_size(store->snap->cow->bdev);
/* Is there enough room ? */
if (size < ((ps->next_free + 1) * store->snap->chunk_size))
return -ENOSPC;
e->new_chunk = ps->next_free;
/*
* Move onto the next free pending, making sure to take
* into account the location of the metadata chunks.
*/
stride = (ps->exceptions_per_area + 1);
next_free = ++ps->next_free;
if (sector_div(next_free, stride) == 1)
ps->next_free++;
atomic_inc(&ps->pending_count);
return 0;
}
static void persistent_commit_exception(struct dm_exception_store *store,
struct dm_snap_exception *e,
void (*callback) (void *, int success),
void *callback_context)
{
unsigned int i;
struct pstore *ps = get_info(store);
struct disk_exception de;
struct commit_callback *cb;
de.old_chunk = e->old_chunk;
de.new_chunk = e->new_chunk;
write_exception(ps, ps->current_committed++, &de);
/*
* Add the callback to the back of the array. This code
* is the only place where the callback array is
* manipulated, and we know that it will never be called
* multiple times concurrently.
*/
cb = ps->callbacks + ps->callback_count++;
cb->callback = callback;
cb->context = callback_context;
/*
* If there are exceptions in flight and we have not yet
* filled this metadata area there's nothing more to do.
*/
if (!atomic_dec_and_test(&ps->pending_count) &&
(ps->current_committed != ps->exceptions_per_area))
return;
/*
* If we completely filled the current area, then wipe the next one.
*/
if ((ps->current_committed == ps->exceptions_per_area) &&
zero_disk_area(ps, ps->current_area + 1))
ps->valid = 0;
/*
* Commit exceptions to disk.
*/
if (ps->valid && area_io(ps, WRITE))
ps->valid = 0;
/*
* Advance to the next area if this one is full.
*/
if (ps->current_committed == ps->exceptions_per_area) {
ps->current_committed = 0;
ps->current_area++;
zero_memory_area(ps);
}
for (i = 0; i < ps->callback_count; i++) {
cb = ps->callbacks + i;
cb->callback(cb->context, ps->valid);
}
ps->callback_count = 0;
}
static void persistent_drop_snapshot(struct dm_exception_store *store)
{
struct pstore *ps = get_info(store);
ps->valid = 0;
if (write_header(ps))
DMWARN("write header failed");
}
int dm_create_persistent(struct dm_exception_store *store)
{
struct pstore *ps;
/* allocate the pstore */
ps = kmalloc(sizeof(*ps), GFP_KERNEL);
if (!ps)
return -ENOMEM;
ps->snap = store->snap;
ps->valid = 1;
ps->version = SNAPSHOT_DISK_VERSION;
ps->area = NULL;
ps->next_free = 2; /* skipping the header and first area */
ps->current_committed = 0;
ps->callback_count = 0;
atomic_set(&ps->pending_count, 0);
ps->callbacks = NULL;
ps->metadata_wq = create_singlethread_workqueue("ksnaphd");
if (!ps->metadata_wq) {
kfree(ps);
DMERR("couldn't start header metadata update thread");
return -ENOMEM;
}
store->destroy = persistent_destroy;
store->read_metadata = persistent_read_metadata;
store->prepare_exception = persistent_prepare_exception;
store->commit_exception = persistent_commit_exception;
store->drop_snapshot = persistent_drop_snapshot;
store->fraction_full = persistent_fraction_full;
store->context = ps;
return 0;
}
int dm_persistent_snapshot_init(void)
{
return 0;
}
void dm_persistent_snapshot_exit(void)
{
}

View file

@ -0,0 +1,98 @@
/*
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
* Copyright (C) 2006-2008 Red Hat GmbH
*
* This file is released under the GPL.
*/
#include "dm-exception-store.h"
#include "dm-snap.h"
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/dm-io.h>
#define DM_MSG_PREFIX "transient snapshot"
/*-----------------------------------------------------------------
* Implementation of the store for non-persistent snapshots.
*---------------------------------------------------------------*/
struct transient_c {
sector_t next_free;
};
static void transient_destroy(struct dm_exception_store *store)
{
kfree(store->context);
}
static int transient_read_metadata(struct dm_exception_store *store,
int (*callback)(void *callback_context,
chunk_t old, chunk_t new),
void *callback_context)
{
return 0;
}
static int transient_prepare_exception(struct dm_exception_store *store,
struct dm_snap_exception *e)
{
struct transient_c *tc = (struct transient_c *) store->context;
sector_t size = get_dev_size(store->snap->cow->bdev);
if (size < (tc->next_free + store->snap->chunk_size))
return -1;
e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
tc->next_free += store->snap->chunk_size;
return 0;
}
static void transient_commit_exception(struct dm_exception_store *store,
struct dm_snap_exception *e,
void (*callback) (void *, int success),
void *callback_context)
{
/* Just succeed */
callback(callback_context, 1);
}
static void transient_fraction_full(struct dm_exception_store *store,
sector_t *numerator, sector_t *denominator)
{
*numerator = ((struct transient_c *) store->context)->next_free;
*denominator = get_dev_size(store->snap->cow->bdev);
}
int dm_create_transient(struct dm_exception_store *store)
{
struct transient_c *tc;
store->destroy = transient_destroy;
store->read_metadata = transient_read_metadata;
store->prepare_exception = transient_prepare_exception;
store->commit_exception = transient_commit_exception;
store->drop_snapshot = NULL;
store->fraction_full = transient_fraction_full;
tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
if (!tc)
return -ENOMEM;
tc->next_free = 0;
store->context = tc;
return 0;
}
int dm_transient_snapshot_init(void)
{
return 0;
}
void dm_transient_snapshot_exit(void)
{
}

View file

@ -9,6 +9,7 @@
#include <linux/blkdev.h>
#include <linux/ctype.h>
#include <linux/device-mapper.h>
#include <linux/delay.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kdev_t.h>
@ -20,6 +21,7 @@
#include <linux/log2.h>
#include <linux/dm-kcopyd.h>
#include "dm-exception-store.h"
#include "dm-snap.h"
#include "dm-bio-list.h"
@ -428,8 +430,13 @@ static void insert_completed_exception(struct dm_snapshot *s,
list_add(&new_e->hash_list, e ? &e->hash_list : l);
}
int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new)
/*
* Callback used by the exception stores to load exceptions when
* initialising.
*/
static int dm_add_exception(void *context, chunk_t old, chunk_t new)
{
struct dm_snapshot *s = context;
struct dm_snap_exception *e;
e = alloc_exception();
@ -658,7 +665,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
spin_lock_init(&s->tracked_chunk_lock);
/* Metadata must only be loaded into one table at once */
r = s->store.read_metadata(&s->store);
r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s);
if (r < 0) {
ti->error = "Failed to read snapshot metadata";
goto bad_load_and_register;
@ -735,7 +742,7 @@ static void snapshot_dtr(struct dm_target *ti)
unregister_snapshot(s);
while (atomic_read(&s->pending_exceptions_count))
yield();
msleep(1);
/*
* Ensure instructions in mempool_destroy aren't reordered
* before atomic_read.
@ -888,10 +895,10 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
/*
* Check for conflicting reads. This is extremely improbable,
* so yield() is sufficient and there is no need for a wait queue.
* so msleep(1) is sufficient and there is no need for a wait queue.
*/
while (__chunk_is_tracked(s, pe->e.old_chunk))
yield();
msleep(1);
/*
* Add a proper exception, and remove the
@ -1404,6 +1411,12 @@ static int __init dm_snapshot_init(void)
{
int r;
r = dm_exception_store_init();
if (r) {
DMERR("Failed to initialize exception stores");
return r;
}
r = dm_register_target(&snapshot_target);
if (r) {
DMERR("snapshot target register failed %d", r);
@ -1452,39 +1465,34 @@ static int __init dm_snapshot_init(void)
return 0;
bad_pending_pool:
bad_pending_pool:
kmem_cache_destroy(tracked_chunk_cache);
bad5:
bad5:
kmem_cache_destroy(pending_cache);
bad4:
bad4:
kmem_cache_destroy(exception_cache);
bad3:
bad3:
exit_origin_hash();
bad2:
bad2:
dm_unregister_target(&origin_target);
bad1:
bad1:
dm_unregister_target(&snapshot_target);
return r;
}
static void __exit dm_snapshot_exit(void)
{
int r;
destroy_workqueue(ksnapd);
r = dm_unregister_target(&snapshot_target);
if (r)
DMERR("snapshot unregister failed %d", r);
r = dm_unregister_target(&origin_target);
if (r)
DMERR("origin unregister failed %d", r);
dm_unregister_target(&snapshot_target);
dm_unregister_target(&origin_target);
exit_origin_hash();
kmem_cache_destroy(pending_cache);
kmem_cache_destroy(exception_cache);
kmem_cache_destroy(tracked_chunk_cache);
dm_exception_store_exit();
}
/* Module hooks */

View file

@ -1,6 +1,4 @@
/*
* dm-snapshot.c
*
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
*
* This file is released under the GPL.
@ -10,6 +8,7 @@
#define DM_SNAPSHOT_H
#include <linux/device-mapper.h>
#include "dm-exception-store.h"
#include "dm-bio-list.h"
#include <linux/blkdev.h>
#include <linux/workqueue.h>
@ -20,116 +19,6 @@ struct exception_table {
struct list_head *table;
};
/*
* The snapshot code deals with largish chunks of the disk at a
* time. Typically 32k - 512k.
*/
typedef sector_t chunk_t;
/*
* An exception is used where an old chunk of data has been
* replaced by a new one.
* If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number
* of chunks that follow contiguously. Remaining bits hold the number of the
* chunk within the device.
*/
struct dm_snap_exception {
struct list_head hash_list;
chunk_t old_chunk;
chunk_t new_chunk;
};
/*
* Funtions to manipulate consecutive chunks
*/
# if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
# define DM_CHUNK_CONSECUTIVE_BITS 8
# define DM_CHUNK_NUMBER_BITS 56
static inline chunk_t dm_chunk_number(chunk_t chunk)
{
return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL);
}
static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e)
{
return e->new_chunk >> DM_CHUNK_NUMBER_BITS;
}
static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
{
e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS);
BUG_ON(!dm_consecutive_chunk_count(e));
}
# else
# define DM_CHUNK_CONSECUTIVE_BITS 0
static inline chunk_t dm_chunk_number(chunk_t chunk)
{
return chunk;
}
static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e)
{
return 0;
}
static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
{
}
# endif
/*
* Abstraction to handle the meta/layout of exception stores (the
* COW device).
*/
struct exception_store {
/*
* Destroys this object when you've finished with it.
*/
void (*destroy) (struct exception_store *store);
/*
* The target shouldn't read the COW device until this is
* called.
*/
int (*read_metadata) (struct exception_store *store);
/*
* Find somewhere to store the next exception.
*/
int (*prepare_exception) (struct exception_store *store,
struct dm_snap_exception *e);
/*
* Update the metadata with this exception.
*/
void (*commit_exception) (struct exception_store *store,
struct dm_snap_exception *e,
void (*callback) (void *, int success),
void *callback_context);
/*
* The snapshot is invalid, note this in the metadata.
*/
void (*drop_snapshot) (struct exception_store *store);
/*
* Return how full the snapshot is.
*/
void (*fraction_full) (struct exception_store *store,
sector_t *numerator,
sector_t *denominator);
struct dm_snapshot *snap;
void *context;
};
#define DM_TRACKED_CHUNK_HASH_SIZE 16
#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
(DM_TRACKED_CHUNK_HASH_SIZE - 1))
@ -172,7 +61,7 @@ struct dm_snapshot {
spinlock_t pe_lock;
/* The on disk metadata handler */
struct exception_store store;
struct dm_exception_store store;
struct dm_kcopyd_client *kcopyd_client;
@ -186,20 +75,6 @@ struct dm_snapshot {
struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
};
/*
* Used by the exception stores to load exceptions hen
* initialising.
*/
int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new);
/*
* Constructor and destructor for the default persistent
* store.
*/
int dm_create_persistent(struct exception_store *store);
int dm_create_transient(struct exception_store *store);
/*
* Return the number of sectors in the device.
*/

View file

@ -337,9 +337,7 @@ int __init dm_stripe_init(void)
void dm_stripe_exit(void)
{
if (dm_unregister_target(&stripe_target))
DMWARN("target unregistration failed");
dm_unregister_target(&stripe_target);
destroy_workqueue(kstriped);
return;

99
drivers/md/dm-sysfs.c Normal file
View file

@ -0,0 +1,99 @@
/*
* Copyright (C) 2008 Red Hat, Inc. All rights reserved.
*
* This file is released under the GPL.
*/
#include <linux/sysfs.h>
#include <linux/dm-ioctl.h>
#include "dm.h"
struct dm_sysfs_attr {
struct attribute attr;
ssize_t (*show)(struct mapped_device *, char *);
ssize_t (*store)(struct mapped_device *, char *);
};
#define DM_ATTR_RO(_name) \
struct dm_sysfs_attr dm_attr_##_name = \
__ATTR(_name, S_IRUGO, dm_attr_##_name##_show, NULL)
static ssize_t dm_attr_show(struct kobject *kobj, struct attribute *attr,
char *page)
{
struct dm_sysfs_attr *dm_attr;
struct mapped_device *md;
ssize_t ret;
dm_attr = container_of(attr, struct dm_sysfs_attr, attr);
if (!dm_attr->show)
return -EIO;
md = dm_get_from_kobject(kobj);
if (!md)
return -EINVAL;
ret = dm_attr->show(md, page);
dm_put(md);
return ret;
}
static ssize_t dm_attr_name_show(struct mapped_device *md, char *buf)
{
if (dm_copy_name_and_uuid(md, buf, NULL))
return -EIO;
strcat(buf, "\n");
return strlen(buf);
}
static ssize_t dm_attr_uuid_show(struct mapped_device *md, char *buf)
{
if (dm_copy_name_and_uuid(md, NULL, buf))
return -EIO;
strcat(buf, "\n");
return strlen(buf);
}
static DM_ATTR_RO(name);
static DM_ATTR_RO(uuid);
static struct attribute *dm_attrs[] = {
&dm_attr_name.attr,
&dm_attr_uuid.attr,
NULL,
};
static struct sysfs_ops dm_sysfs_ops = {
.show = dm_attr_show,
};
/*
* dm kobject is embedded in mapped_device structure
* no need to define release function here
*/
static struct kobj_type dm_ktype = {
.sysfs_ops = &dm_sysfs_ops,
.default_attrs = dm_attrs,
};
/*
* Initialize kobj
* because nobody using md yet, no need to call explicit dm_get/put
*/
int dm_sysfs_init(struct mapped_device *md)
{
return kobject_init_and_add(dm_kobject(md), &dm_ktype,
&disk_to_dev(dm_disk(md))->kobj,
"%s", "dm");
}
/*
* Remove kobj, called after all references removed
*/
void dm_sysfs_exit(struct mapped_device *md)
{
kobject_put(dm_kobject(md));
}

View file

@ -1,6 +1,6 @@
/*
* Copyright (C) 2001 Sistina Software (UK) Limited.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
*
* This file is released under the GPL.
*/
@ -15,6 +15,7 @@
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/mutex.h>
#include <linux/delay.h>
#include <asm/atomic.h>
#define DM_MSG_PREFIX "table"
@ -24,6 +25,19 @@
#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
/*
* The table has always exactly one reference from either mapped_device->map
* or hash_cell->new_map. This reference is not counted in table->holders.
* A pair of dm_create_table/dm_destroy_table functions is used for table
* creation/destruction.
*
* Temporary references from the other code increase table->holders. A pair
* of dm_table_get/dm_table_put functions is used to manipulate it.
*
* When the table is about to be destroyed, we wait for table->holders to
* drop to zero.
*/
struct dm_table {
struct mapped_device *md;
atomic_t holders;
@ -38,6 +52,8 @@ struct dm_table {
sector_t *highs;
struct dm_target *targets;
unsigned barriers_supported:1;
/*
* Indicates the rw permissions for the new logical
* device. This should be a combination of FMODE_READ
@ -226,7 +242,8 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
return -ENOMEM;
INIT_LIST_HEAD(&t->devices);
atomic_set(&t->holders, 1);
atomic_set(&t->holders, 0);
t->barriers_supported = 1;
if (!num_targets)
num_targets = KEYS_PER_NODE;
@ -256,10 +273,14 @@ static void free_devices(struct list_head *devices)
}
}
static void table_destroy(struct dm_table *t)
void dm_table_destroy(struct dm_table *t)
{
unsigned int i;
while (atomic_read(&t->holders))
msleep(1);
smp_mb();
/* free the indexes (see dm_table_complete) */
if (t->depth >= 2)
vfree(t->index[t->depth - 2]);
@ -297,8 +318,8 @@ void dm_table_put(struct dm_table *t)
if (!t)
return;
if (atomic_dec_and_test(&t->holders))
table_destroy(t);
smp_mb__before_atomic_dec();
atomic_dec(&t->holders);
}
/*
@ -728,6 +749,10 @@ int dm_table_add_target(struct dm_table *t, const char *type,
/* FIXME: the plan is to combine high here and then have
* the merge fn apply the target level restrictions. */
combine_restrictions_low(&t->limits, &tgt->limits);
if (!(tgt->type->features & DM_TARGET_SUPPORTS_BARRIERS))
t->barriers_supported = 0;
return 0;
bad:
@ -772,6 +797,12 @@ int dm_table_complete(struct dm_table *t)
check_for_valid_limits(&t->limits);
/*
* We only support barriers if there is exactly one underlying device.
*/
if (!list_is_singular(&t->devices))
t->barriers_supported = 0;
/* how many indexes will the btree have ? */
leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
@ -986,6 +1017,12 @@ struct mapped_device *dm_table_get_md(struct dm_table *t)
return t->md;
}
int dm_table_barrier_ok(struct dm_table *t)
{
return t->barriers_supported;
}
EXPORT_SYMBOL(dm_table_barrier_ok);
EXPORT_SYMBOL(dm_vcalloc);
EXPORT_SYMBOL(dm_get_device);
EXPORT_SYMBOL(dm_put_device);

View file

@ -130,26 +130,26 @@ int dm_register_target(struct target_type *t)
return rv;
}
int dm_unregister_target(struct target_type *t)
void dm_unregister_target(struct target_type *t)
{
struct tt_internal *ti;
down_write(&_lock);
if (!(ti = __find_target_type(t->name))) {
up_write(&_lock);
return -EINVAL;
DMCRIT("Unregistering unrecognised target: %s", t->name);
BUG();
}
if (ti->use) {
up_write(&_lock);
return -ETXTBSY;
DMCRIT("Attempt to unregister target still in use: %s",
t->name);
BUG();
}
list_del(&ti->list);
kfree(ti);
up_write(&_lock);
return 0;
}
/*
@ -187,8 +187,7 @@ int __init dm_target_init(void)
void dm_target_exit(void)
{
if (dm_unregister_target(&error_target))
DMWARN("error target unregistration failed");
dm_unregister_target(&error_target);
}
EXPORT_SYMBOL(dm_register_target);

View file

@ -69,10 +69,7 @@ static int __init dm_zero_init(void)
static void __exit dm_zero_exit(void)
{
int r = dm_unregister_target(&zero_target);
if (r < 0)
DMERR("unregister failed %d", r);
dm_unregister_target(&zero_target);
}
module_init(dm_zero_init)

View file

@ -1,6 +1,6 @@
/*
* Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
*
* This file is released under the GPL.
*/
@ -32,6 +32,7 @@ static unsigned int _major = 0;
static DEFINE_SPINLOCK(_minor_lock);
/*
* For bio-based dm.
* One of these is allocated per bio.
*/
struct dm_io {
@ -43,6 +44,7 @@ struct dm_io {
};
/*
* For bio-based dm.
* One of these is allocated per target within a bio. Hopefully
* this will be simplified out one day.
*/
@ -54,6 +56,27 @@ struct dm_target_io {
DEFINE_TRACE(block_bio_complete);
/*
* For request-based dm.
* One of these is allocated per request.
*/
struct dm_rq_target_io {
struct mapped_device *md;
struct dm_target *ti;
struct request *orig, clone;
int error;
union map_info info;
};
/*
* For request-based dm.
* One of these is allocated per bio.
*/
struct dm_rq_clone_bio_info {
struct bio *orig;
struct request *rq;
};
union map_info *dm_get_mapinfo(struct bio *bio)
{
if (bio && bio->bi_private)
@ -144,11 +167,16 @@ struct mapped_device {
/* forced geometry settings */
struct hd_geometry geometry;
/* sysfs handle */
struct kobject kobj;
};
#define MIN_IOS 256
static struct kmem_cache *_io_cache;
static struct kmem_cache *_tio_cache;
static struct kmem_cache *_rq_tio_cache;
static struct kmem_cache *_rq_bio_info_cache;
static int __init local_init(void)
{
@ -164,9 +192,17 @@ static int __init local_init(void)
if (!_tio_cache)
goto out_free_io_cache;
_rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
if (!_rq_tio_cache)
goto out_free_tio_cache;
_rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0);
if (!_rq_bio_info_cache)
goto out_free_rq_tio_cache;
r = dm_uevent_init();
if (r)
goto out_free_tio_cache;
goto out_free_rq_bio_info_cache;
_major = major;
r = register_blkdev(_major, _name);
@ -180,6 +216,10 @@ static int __init local_init(void)
out_uevent_exit:
dm_uevent_exit();
out_free_rq_bio_info_cache:
kmem_cache_destroy(_rq_bio_info_cache);
out_free_rq_tio_cache:
kmem_cache_destroy(_rq_tio_cache);
out_free_tio_cache:
kmem_cache_destroy(_tio_cache);
out_free_io_cache:
@ -190,6 +230,8 @@ static int __init local_init(void)
static void local_exit(void)
{
kmem_cache_destroy(_rq_bio_info_cache);
kmem_cache_destroy(_rq_tio_cache);
kmem_cache_destroy(_tio_cache);
kmem_cache_destroy(_io_cache);
unregister_blkdev(_major, _name);
@ -796,7 +838,11 @@ static int __split_bio(struct mapped_device *md, struct bio *bio)
ci.map = dm_get_table(md);
if (unlikely(!ci.map))
return -EIO;
if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
dm_table_put(ci.map);
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
ci.md = md;
ci.bio = bio;
ci.io = alloc_io(md);
@ -880,15 +926,6 @@ static int dm_request(struct request_queue *q, struct bio *bio)
struct mapped_device *md = q->queuedata;
int cpu;
/*
* There is no use in forwarding any barrier request since we can't
* guarantee it is (or can be) handled by the targets correctly.
*/
if (unlikely(bio_barrier(bio))) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
down_read(&md->io_lock);
cpu = part_stat_lock();
@ -943,8 +980,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
struct mapped_device *md = congested_data;
struct dm_table *map;
atomic_inc(&md->pending);
if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
map = dm_get_table(md);
if (map) {
@ -953,10 +988,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
}
}
if (!atomic_dec_return(&md->pending))
/* nudge anyone waiting on suspend queue */
wake_up(&md->wait);
return r;
}
@ -1216,10 +1247,12 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
if (md->suspended_bdev)
__set_size(md, size);
if (size == 0)
return 0;
dm_table_get(t);
if (!size) {
dm_table_destroy(t);
return 0;
}
dm_table_event_callback(t, event_callback, md);
write_lock(&md->map_lock);
@ -1241,7 +1274,7 @@ static void __unbind(struct mapped_device *md)
write_lock(&md->map_lock);
md->map = NULL;
write_unlock(&md->map_lock);
dm_table_put(map);
dm_table_destroy(map);
}
/*
@ -1255,6 +1288,8 @@ int dm_create(int minor, struct mapped_device **result)
if (!md)
return -ENXIO;
dm_sysfs_init(md);
*result = md;
return 0;
}
@ -1330,8 +1365,9 @@ void dm_put(struct mapped_device *md)
dm_table_presuspend_targets(map);
dm_table_postsuspend_targets(map);
}
__unbind(md);
dm_sysfs_exit(md);
dm_table_put(map);
__unbind(md);
free_dev(md);
}
}
@ -1669,6 +1705,27 @@ struct gendisk *dm_disk(struct mapped_device *md)
return md->disk;
}
struct kobject *dm_kobject(struct mapped_device *md)
{
return &md->kobj;
}
/*
* struct mapped_device should not be exported outside of dm.c
* so use this check to verify that kobj is part of md structure
*/
struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
{
struct mapped_device *md;
md = container_of(kobj, struct mapped_device, kobj);
if (&md->kobj != kobj)
return NULL;
dm_get(md);
return md;
}
int dm_suspended(struct mapped_device *md)
{
return test_bit(DMF_SUSPENDED, &md->flags);

View file

@ -36,6 +36,7 @@ struct dm_table;
/*-----------------------------------------------------------------
* Internal table functions.
*---------------------------------------------------------------*/
void dm_table_destroy(struct dm_table *t);
void dm_table_event_callback(struct dm_table *t,
void (*fn)(void *), void *context);
struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
@ -51,6 +52,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits);
* To check the return value from dm_table_find_target().
*/
#define dm_target_is_valid(t) ((t)->table)
int dm_table_barrier_ok(struct dm_table *t);
/*-----------------------------------------------------------------
* A registry of target types.
@ -71,6 +73,14 @@ int dm_split_args(int *argc, char ***argvp, char *input);
int dm_interface_init(void);
void dm_interface_exit(void);
/*
* sysfs interface
*/
int dm_sysfs_init(struct mapped_device *md);
void dm_sysfs_exit(struct mapped_device *md);
struct kobject *dm_kobject(struct mapped_device *md);
struct mapped_device *dm_get_from_kobject(struct kobject *kobj);
/*
* Targets for linear and striped mappings
*/

View file

@ -45,6 +45,8 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti);
*/
typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio,
union map_info *map_context);
typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
union map_info *map_context);
/*
* Returns:
@ -57,6 +59,9 @@ typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio,
typedef int (*dm_endio_fn) (struct dm_target *ti,
struct bio *bio, int error,
union map_info *map_context);
typedef int (*dm_request_endio_fn) (struct dm_target *ti,
struct request *clone, int error,
union map_info *map_context);
typedef void (*dm_flush_fn) (struct dm_target *ti);
typedef void (*dm_presuspend_fn) (struct dm_target *ti);
@ -75,6 +80,13 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd,
typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm,
struct bio_vec *biovec, int max_size);
/*
* Returns:
* 0: The target can handle the next I/O immediately.
* 1: The target can't handle the next I/O immediately.
*/
typedef int (*dm_busy_fn) (struct dm_target *ti);
void dm_error(const char *message);
/*
@ -100,14 +112,23 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d);
/*
* Information about a target type
*/
/*
* Target features
*/
#define DM_TARGET_SUPPORTS_BARRIERS 0x00000001
struct target_type {
uint64_t features;
const char *name;
struct module *module;
unsigned version[3];
dm_ctr_fn ctr;
dm_dtr_fn dtr;
dm_map_fn map;
dm_map_request_fn map_rq;
dm_endio_fn end_io;
dm_request_endio_fn rq_end_io;
dm_flush_fn flush;
dm_presuspend_fn presuspend;
dm_postsuspend_fn postsuspend;
@ -117,6 +138,7 @@ struct target_type {
dm_message_fn message;
dm_ioctl_fn ioctl;
dm_merge_fn merge;
dm_busy_fn busy;
};
struct io_restrictions {
@ -157,8 +179,7 @@ struct dm_target {
};
int dm_register_target(struct target_type *t);
int dm_unregister_target(struct target_type *t);
void dm_unregister_target(struct target_type *t);
/*-----------------------------------------------------------------
* Functions for creating and manipulating mapped devices.
@ -276,6 +297,9 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
*---------------------------------------------------------------*/
#define DM_NAME "device-mapper"
#define DMCRIT(f, arg...) \
printk(KERN_CRIT DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
#define DMERR(f, arg...) \
printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
#define DMERR_LIMIT(f, arg...) \