Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm
* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: dm snapshot: extend exception store functions dm snapshot: split out exception store implementations dm snapshot: rename struct exception_store dm snapshot: separate out exception store interface dm mpath: move trigger_event to system workqueue dm: add name and uuid to sysfs dm table: rework reference counting dm: support barriers on simple devices dm request: extend target interface dm request: add caches dm ioctl: allow dm_copy_name_and_uuid to return only one field dm log: ensure log bitmap fits on log device dm log: move region_size validation dm log: avoid reinitialising io_req on every operation dm: consolidate target deregistration error handling dm raid1: fix error count dm log: fix dm_io_client leak on error paths dm snapshot: change yield to msleep dm table: drop reference at unbind
This commit is contained in:
commit
238c6d5483
22 changed files with 1321 additions and 985 deletions
|
@ -3,9 +3,10 @@
|
|||
#
|
||||
|
||||
dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
|
||||
dm-ioctl.o dm-io.o dm-kcopyd.o
|
||||
dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o
|
||||
dm-multipath-objs := dm-path-selector.o dm-mpath.o
|
||||
dm-snapshot-objs := dm-snap.o dm-exception-store.o
|
||||
dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \
|
||||
dm-snap-persistent.o
|
||||
dm-mirror-objs := dm-raid1.o
|
||||
md-mod-objs := md.o bitmap.o
|
||||
raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \
|
||||
|
|
|
@ -1322,11 +1322,7 @@ static int __init dm_crypt_init(void)
|
|||
|
||||
static void __exit dm_crypt_exit(void)
|
||||
{
|
||||
int r = dm_unregister_target(&crypt_target);
|
||||
|
||||
if (r < 0)
|
||||
DMERR("unregister failed %d", r);
|
||||
|
||||
dm_unregister_target(&crypt_target);
|
||||
kmem_cache_destroy(_crypt_io_pool);
|
||||
}
|
||||
|
||||
|
|
|
@ -364,11 +364,7 @@ static int __init dm_delay_init(void)
|
|||
|
||||
static void __exit dm_delay_exit(void)
|
||||
{
|
||||
int r = dm_unregister_target(&delay_target);
|
||||
|
||||
if (r < 0)
|
||||
DMERR("unregister failed %d", r);
|
||||
|
||||
dm_unregister_target(&delay_target);
|
||||
kmem_cache_destroy(delayed_cache);
|
||||
destroy_workqueue(kdelayd_wq);
|
||||
}
|
||||
|
|
|
@ -1,756 +1,45 @@
|
|||
/*
|
||||
* dm-exception-store.c
|
||||
*
|
||||
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
|
||||
* Copyright (C) 2006 Red Hat GmbH
|
||||
* Copyright (C) 2006-2008 Red Hat GmbH
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm-snap.h"
|
||||
#include "dm-exception-store.h"
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/dm-io.h>
|
||||
#include <linux/dm-kcopyd.h>
|
||||
|
||||
#define DM_MSG_PREFIX "snapshots"
|
||||
#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */
|
||||
#define DM_MSG_PREFIX "snapshot exception stores"
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* Persistent snapshots, by persistent we mean that the snapshot
|
||||
* will survive a reboot.
|
||||
*---------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
* We need to store a record of which parts of the origin have
|
||||
* been copied to the snapshot device. The snapshot code
|
||||
* requires that we copy exception chunks to chunk aligned areas
|
||||
* of the COW store. It makes sense therefore, to store the
|
||||
* metadata in chunk size blocks.
|
||||
*
|
||||
* There is no backward or forward compatibility implemented,
|
||||
* snapshots with different disk versions than the kernel will
|
||||
* not be usable. It is expected that "lvcreate" will blank out
|
||||
* the start of a fresh COW device before calling the snapshot
|
||||
* constructor.
|
||||
*
|
||||
* The first chunk of the COW device just contains the header.
|
||||
* After this there is a chunk filled with exception metadata,
|
||||
* followed by as many exception chunks as can fit in the
|
||||
* metadata areas.
|
||||
*
|
||||
* All on disk structures are in little-endian format. The end
|
||||
* of the exceptions info is indicated by an exception with a
|
||||
* new_chunk of 0, which is invalid since it would point to the
|
||||
* header chunk.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Magic for persistent snapshots: "SnAp" - Feeble isn't it.
|
||||
*/
|
||||
#define SNAP_MAGIC 0x70416e53
|
||||
|
||||
/*
|
||||
* The on-disk version of the metadata.
|
||||
*/
|
||||
#define SNAPSHOT_DISK_VERSION 1
|
||||
|
||||
struct disk_header {
|
||||
uint32_t magic;
|
||||
|
||||
/*
|
||||
* Is this snapshot valid. There is no way of recovering
|
||||
* an invalid snapshot.
|
||||
*/
|
||||
uint32_t valid;
|
||||
|
||||
/*
|
||||
* Simple, incrementing version. no backward
|
||||
* compatibility.
|
||||
*/
|
||||
uint32_t version;
|
||||
|
||||
/* In sectors */
|
||||
uint32_t chunk_size;
|
||||
};
|
||||
|
||||
struct disk_exception {
|
||||
uint64_t old_chunk;
|
||||
uint64_t new_chunk;
|
||||
};
|
||||
|
||||
struct commit_callback {
|
||||
void (*callback)(void *, int success);
|
||||
void *context;
|
||||
};
|
||||
|
||||
/*
|
||||
* The top level structure for a persistent exception store.
|
||||
*/
|
||||
struct pstore {
|
||||
struct dm_snapshot *snap; /* up pointer to my snapshot */
|
||||
int version;
|
||||
int valid;
|
||||
uint32_t exceptions_per_area;
|
||||
|
||||
/*
|
||||
* Now that we have an asynchronous kcopyd there is no
|
||||
* need for large chunk sizes, so it wont hurt to have a
|
||||
* whole chunks worth of metadata in memory at once.
|
||||
*/
|
||||
void *area;
|
||||
|
||||
/*
|
||||
* An area of zeros used to clear the next area.
|
||||
*/
|
||||
void *zero_area;
|
||||
|
||||
/*
|
||||
* Used to keep track of which metadata area the data in
|
||||
* 'chunk' refers to.
|
||||
*/
|
||||
chunk_t current_area;
|
||||
|
||||
/*
|
||||
* The next free chunk for an exception.
|
||||
*/
|
||||
chunk_t next_free;
|
||||
|
||||
/*
|
||||
* The index of next free exception in the current
|
||||
* metadata area.
|
||||
*/
|
||||
uint32_t current_committed;
|
||||
|
||||
atomic_t pending_count;
|
||||
uint32_t callback_count;
|
||||
struct commit_callback *callbacks;
|
||||
struct dm_io_client *io_client;
|
||||
|
||||
struct workqueue_struct *metadata_wq;
|
||||
};
|
||||
|
||||
static unsigned sectors_to_pages(unsigned sectors)
|
||||
{
|
||||
return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9);
|
||||
}
|
||||
|
||||
static int alloc_area(struct pstore *ps)
|
||||
{
|
||||
int r = -ENOMEM;
|
||||
size_t len;
|
||||
|
||||
len = ps->snap->chunk_size << SECTOR_SHIFT;
|
||||
|
||||
/*
|
||||
* Allocate the chunk_size block of memory that will hold
|
||||
* a single metadata area.
|
||||
*/
|
||||
ps->area = vmalloc(len);
|
||||
if (!ps->area)
|
||||
return r;
|
||||
|
||||
ps->zero_area = vmalloc(len);
|
||||
if (!ps->zero_area) {
|
||||
vfree(ps->area);
|
||||
return r;
|
||||
}
|
||||
memset(ps->zero_area, 0, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_area(struct pstore *ps)
|
||||
{
|
||||
vfree(ps->area);
|
||||
ps->area = NULL;
|
||||
vfree(ps->zero_area);
|
||||
ps->zero_area = NULL;
|
||||
}
|
||||
|
||||
struct mdata_req {
|
||||
struct dm_io_region *where;
|
||||
struct dm_io_request *io_req;
|
||||
struct work_struct work;
|
||||
int result;
|
||||
};
|
||||
|
||||
static void do_metadata(struct work_struct *work)
|
||||
{
|
||||
struct mdata_req *req = container_of(work, struct mdata_req, work);
|
||||
|
||||
req->result = dm_io(req->io_req, 1, req->where, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read or write a chunk aligned and sized block of data from a device.
|
||||
*/
|
||||
static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
|
||||
{
|
||||
struct dm_io_region where = {
|
||||
.bdev = ps->snap->cow->bdev,
|
||||
.sector = ps->snap->chunk_size * chunk,
|
||||
.count = ps->snap->chunk_size,
|
||||
};
|
||||
struct dm_io_request io_req = {
|
||||
.bi_rw = rw,
|
||||
.mem.type = DM_IO_VMA,
|
||||
.mem.ptr.vma = ps->area,
|
||||
.client = ps->io_client,
|
||||
.notify.fn = NULL,
|
||||
};
|
||||
struct mdata_req req;
|
||||
|
||||
if (!metadata)
|
||||
return dm_io(&io_req, 1, &where, NULL);
|
||||
|
||||
req.where = &where;
|
||||
req.io_req = &io_req;
|
||||
|
||||
/*
|
||||
* Issue the synchronous I/O from a different thread
|
||||
* to avoid generic_make_request recursion.
|
||||
*/
|
||||
INIT_WORK(&req.work, do_metadata);
|
||||
queue_work(ps->metadata_wq, &req.work);
|
||||
flush_workqueue(ps->metadata_wq);
|
||||
|
||||
return req.result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a metadata area index to a chunk index.
|
||||
*/
|
||||
static chunk_t area_location(struct pstore *ps, chunk_t area)
|
||||
{
|
||||
return 1 + ((ps->exceptions_per_area + 1) * area);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read or write a metadata area. Remembering to skip the first
|
||||
* chunk which holds the header.
|
||||
*/
|
||||
static int area_io(struct pstore *ps, int rw)
|
||||
int dm_exception_store_init(void)
|
||||
{
|
||||
int r;
|
||||
chunk_t chunk;
|
||||
|
||||
chunk = area_location(ps, ps->current_area);
|
||||
r = dm_transient_snapshot_init();
|
||||
if (r) {
|
||||
DMERR("Unable to register transient exception store type.");
|
||||
goto transient_fail;
|
||||
}
|
||||
|
||||
r = chunk_io(ps, chunk, rw, 0);
|
||||
if (r)
|
||||
return r;
|
||||
r = dm_persistent_snapshot_init();
|
||||
if (r) {
|
||||
DMERR("Unable to register persistent exception store type");
|
||||
goto persistent_fail;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void zero_memory_area(struct pstore *ps)
|
||||
{
|
||||
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
|
||||
}
|
||||
|
||||
static int zero_disk_area(struct pstore *ps, chunk_t area)
|
||||
{
|
||||
struct dm_io_region where = {
|
||||
.bdev = ps->snap->cow->bdev,
|
||||
.sector = ps->snap->chunk_size * area_location(ps, area),
|
||||
.count = ps->snap->chunk_size,
|
||||
};
|
||||
struct dm_io_request io_req = {
|
||||
.bi_rw = WRITE,
|
||||
.mem.type = DM_IO_VMA,
|
||||
.mem.ptr.vma = ps->zero_area,
|
||||
.client = ps->io_client,
|
||||
.notify.fn = NULL,
|
||||
};
|
||||
|
||||
return dm_io(&io_req, 1, &where, NULL);
|
||||
}
|
||||
|
||||
static int read_header(struct pstore *ps, int *new_snapshot)
|
||||
{
|
||||
int r;
|
||||
struct disk_header *dh;
|
||||
chunk_t chunk_size;
|
||||
int chunk_size_supplied = 1;
|
||||
|
||||
/*
|
||||
* Use default chunk size (or hardsect_size, if larger) if none supplied
|
||||
*/
|
||||
if (!ps->snap->chunk_size) {
|
||||
ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
|
||||
bdev_hardsect_size(ps->snap->cow->bdev) >> 9);
|
||||
ps->snap->chunk_mask = ps->snap->chunk_size - 1;
|
||||
ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1;
|
||||
chunk_size_supplied = 0;
|
||||
}
|
||||
|
||||
ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap->
|
||||
chunk_size));
|
||||
if (IS_ERR(ps->io_client))
|
||||
return PTR_ERR(ps->io_client);
|
||||
|
||||
r = alloc_area(ps);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = chunk_io(ps, 0, READ, 1);
|
||||
if (r)
|
||||
goto bad;
|
||||
|
||||
dh = (struct disk_header *) ps->area;
|
||||
|
||||
if (le32_to_cpu(dh->magic) == 0) {
|
||||
*new_snapshot = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
|
||||
DMWARN("Invalid or corrupt snapshot");
|
||||
r = -ENXIO;
|
||||
goto bad;
|
||||
}
|
||||
|
||||
*new_snapshot = 0;
|
||||
ps->valid = le32_to_cpu(dh->valid);
|
||||
ps->version = le32_to_cpu(dh->version);
|
||||
chunk_size = le32_to_cpu(dh->chunk_size);
|
||||
|
||||
if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size)
|
||||
return 0;
|
||||
|
||||
DMWARN("chunk size %llu in device metadata overrides "
|
||||
"table chunk size of %llu.",
|
||||
(unsigned long long)chunk_size,
|
||||
(unsigned long long)ps->snap->chunk_size);
|
||||
|
||||
/* We had a bogus chunk_size. Fix stuff up. */
|
||||
free_area(ps);
|
||||
|
||||
ps->snap->chunk_size = chunk_size;
|
||||
ps->snap->chunk_mask = chunk_size - 1;
|
||||
ps->snap->chunk_shift = ffs(chunk_size) - 1;
|
||||
|
||||
r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size),
|
||||
ps->io_client);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = alloc_area(ps);
|
||||
return r;
|
||||
|
||||
bad:
|
||||
free_area(ps);
|
||||
persistent_fail:
|
||||
dm_persistent_snapshot_exit();
|
||||
transient_fail:
|
||||
return r;
|
||||
}
|
||||
|
||||
static int write_header(struct pstore *ps)
|
||||
void dm_exception_store_exit(void)
|
||||
{
|
||||
struct disk_header *dh;
|
||||
|
||||
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
|
||||
|
||||
dh = (struct disk_header *) ps->area;
|
||||
dh->magic = cpu_to_le32(SNAP_MAGIC);
|
||||
dh->valid = cpu_to_le32(ps->valid);
|
||||
dh->version = cpu_to_le32(ps->version);
|
||||
dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
|
||||
|
||||
return chunk_io(ps, 0, WRITE, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Access functions for the disk exceptions, these do the endian conversions.
|
||||
*/
|
||||
static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
|
||||
{
|
||||
BUG_ON(index >= ps->exceptions_per_area);
|
||||
|
||||
return ((struct disk_exception *) ps->area) + index;
|
||||
}
|
||||
|
||||
static void read_exception(struct pstore *ps,
|
||||
uint32_t index, struct disk_exception *result)
|
||||
{
|
||||
struct disk_exception *e = get_exception(ps, index);
|
||||
|
||||
/* copy it */
|
||||
result->old_chunk = le64_to_cpu(e->old_chunk);
|
||||
result->new_chunk = le64_to_cpu(e->new_chunk);
|
||||
}
|
||||
|
||||
static void write_exception(struct pstore *ps,
|
||||
uint32_t index, struct disk_exception *de)
|
||||
{
|
||||
struct disk_exception *e = get_exception(ps, index);
|
||||
|
||||
/* copy it */
|
||||
e->old_chunk = cpu_to_le64(de->old_chunk);
|
||||
e->new_chunk = cpu_to_le64(de->new_chunk);
|
||||
}
|
||||
|
||||
/*
|
||||
* Registers the exceptions that are present in the current area.
|
||||
* 'full' is filled in to indicate if the area has been
|
||||
* filled.
|
||||
*/
|
||||
static int insert_exceptions(struct pstore *ps, int *full)
|
||||
{
|
||||
int r;
|
||||
unsigned int i;
|
||||
struct disk_exception de;
|
||||
|
||||
/* presume the area is full */
|
||||
*full = 1;
|
||||
|
||||
for (i = 0; i < ps->exceptions_per_area; i++) {
|
||||
read_exception(ps, i, &de);
|
||||
|
||||
/*
|
||||
* If the new_chunk is pointing at the start of
|
||||
* the COW device, where the first metadata area
|
||||
* is we know that we've hit the end of the
|
||||
* exceptions. Therefore the area is not full.
|
||||
*/
|
||||
if (de.new_chunk == 0LL) {
|
||||
ps->current_committed = i;
|
||||
*full = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Keep track of the start of the free chunks.
|
||||
*/
|
||||
if (ps->next_free <= de.new_chunk)
|
||||
ps->next_free = de.new_chunk + 1;
|
||||
|
||||
/*
|
||||
* Otherwise we add the exception to the snapshot.
|
||||
*/
|
||||
r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_exceptions(struct pstore *ps)
|
||||
{
|
||||
int r, full = 1;
|
||||
|
||||
/*
|
||||
* Keeping reading chunks and inserting exceptions until
|
||||
* we find a partially full area.
|
||||
*/
|
||||
for (ps->current_area = 0; full; ps->current_area++) {
|
||||
r = area_io(ps, READ);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = insert_exceptions(ps, &full);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
ps->current_area--;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pstore *get_info(struct exception_store *store)
|
||||
{
|
||||
return (struct pstore *) store->context;
|
||||
}
|
||||
|
||||
static void persistent_fraction_full(struct exception_store *store,
|
||||
sector_t *numerator, sector_t *denominator)
|
||||
{
|
||||
*numerator = get_info(store)->next_free * store->snap->chunk_size;
|
||||
*denominator = get_dev_size(store->snap->cow->bdev);
|
||||
}
|
||||
|
||||
static void persistent_destroy(struct exception_store *store)
|
||||
{
|
||||
struct pstore *ps = get_info(store);
|
||||
|
||||
destroy_workqueue(ps->metadata_wq);
|
||||
dm_io_client_destroy(ps->io_client);
|
||||
vfree(ps->callbacks);
|
||||
free_area(ps);
|
||||
kfree(ps);
|
||||
}
|
||||
|
||||
static int persistent_read_metadata(struct exception_store *store)
|
||||
{
|
||||
int r, uninitialized_var(new_snapshot);
|
||||
struct pstore *ps = get_info(store);
|
||||
|
||||
/*
|
||||
* Read the snapshot header.
|
||||
*/
|
||||
r = read_header(ps, &new_snapshot);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/*
|
||||
* Now we know correct chunk_size, complete the initialisation.
|
||||
*/
|
||||
ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
|
||||
sizeof(struct disk_exception);
|
||||
ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
|
||||
sizeof(*ps->callbacks));
|
||||
if (!ps->callbacks)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Do we need to setup a new snapshot ?
|
||||
*/
|
||||
if (new_snapshot) {
|
||||
r = write_header(ps);
|
||||
if (r) {
|
||||
DMWARN("write_header failed");
|
||||
return r;
|
||||
}
|
||||
|
||||
ps->current_area = 0;
|
||||
zero_memory_area(ps);
|
||||
r = zero_disk_area(ps, 0);
|
||||
if (r) {
|
||||
DMWARN("zero_disk_area(0) failed");
|
||||
return r;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Sanity checks.
|
||||
*/
|
||||
if (ps->version != SNAPSHOT_DISK_VERSION) {
|
||||
DMWARN("unable to handle snapshot disk version %d",
|
||||
ps->version);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Metadata are valid, but snapshot is invalidated
|
||||
*/
|
||||
if (!ps->valid)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* Read the metadata.
|
||||
*/
|
||||
r = read_exceptions(ps);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int persistent_prepare(struct exception_store *store,
|
||||
struct dm_snap_exception *e)
|
||||
{
|
||||
struct pstore *ps = get_info(store);
|
||||
uint32_t stride;
|
||||
chunk_t next_free;
|
||||
sector_t size = get_dev_size(store->snap->cow->bdev);
|
||||
|
||||
/* Is there enough room ? */
|
||||
if (size < ((ps->next_free + 1) * store->snap->chunk_size))
|
||||
return -ENOSPC;
|
||||
|
||||
e->new_chunk = ps->next_free;
|
||||
|
||||
/*
|
||||
* Move onto the next free pending, making sure to take
|
||||
* into account the location of the metadata chunks.
|
||||
*/
|
||||
stride = (ps->exceptions_per_area + 1);
|
||||
next_free = ++ps->next_free;
|
||||
if (sector_div(next_free, stride) == 1)
|
||||
ps->next_free++;
|
||||
|
||||
atomic_inc(&ps->pending_count);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void persistent_commit(struct exception_store *store,
|
||||
struct dm_snap_exception *e,
|
||||
void (*callback) (void *, int success),
|
||||
void *callback_context)
|
||||
{
|
||||
unsigned int i;
|
||||
struct pstore *ps = get_info(store);
|
||||
struct disk_exception de;
|
||||
struct commit_callback *cb;
|
||||
|
||||
de.old_chunk = e->old_chunk;
|
||||
de.new_chunk = e->new_chunk;
|
||||
write_exception(ps, ps->current_committed++, &de);
|
||||
|
||||
/*
|
||||
* Add the callback to the back of the array. This code
|
||||
* is the only place where the callback array is
|
||||
* manipulated, and we know that it will never be called
|
||||
* multiple times concurrently.
|
||||
*/
|
||||
cb = ps->callbacks + ps->callback_count++;
|
||||
cb->callback = callback;
|
||||
cb->context = callback_context;
|
||||
|
||||
/*
|
||||
* If there are exceptions in flight and we have not yet
|
||||
* filled this metadata area there's nothing more to do.
|
||||
*/
|
||||
if (!atomic_dec_and_test(&ps->pending_count) &&
|
||||
(ps->current_committed != ps->exceptions_per_area))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If we completely filled the current area, then wipe the next one.
|
||||
*/
|
||||
if ((ps->current_committed == ps->exceptions_per_area) &&
|
||||
zero_disk_area(ps, ps->current_area + 1))
|
||||
ps->valid = 0;
|
||||
|
||||
/*
|
||||
* Commit exceptions to disk.
|
||||
*/
|
||||
if (ps->valid && area_io(ps, WRITE))
|
||||
ps->valid = 0;
|
||||
|
||||
/*
|
||||
* Advance to the next area if this one is full.
|
||||
*/
|
||||
if (ps->current_committed == ps->exceptions_per_area) {
|
||||
ps->current_committed = 0;
|
||||
ps->current_area++;
|
||||
zero_memory_area(ps);
|
||||
}
|
||||
|
||||
for (i = 0; i < ps->callback_count; i++) {
|
||||
cb = ps->callbacks + i;
|
||||
cb->callback(cb->context, ps->valid);
|
||||
}
|
||||
|
||||
ps->callback_count = 0;
|
||||
}
|
||||
|
||||
static void persistent_drop(struct exception_store *store)
|
||||
{
|
||||
struct pstore *ps = get_info(store);
|
||||
|
||||
ps->valid = 0;
|
||||
if (write_header(ps))
|
||||
DMWARN("write header failed");
|
||||
}
|
||||
|
||||
int dm_create_persistent(struct exception_store *store)
|
||||
{
|
||||
struct pstore *ps;
|
||||
|
||||
/* allocate the pstore */
|
||||
ps = kmalloc(sizeof(*ps), GFP_KERNEL);
|
||||
if (!ps)
|
||||
return -ENOMEM;
|
||||
|
||||
ps->snap = store->snap;
|
||||
ps->valid = 1;
|
||||
ps->version = SNAPSHOT_DISK_VERSION;
|
||||
ps->area = NULL;
|
||||
ps->next_free = 2; /* skipping the header and first area */
|
||||
ps->current_committed = 0;
|
||||
|
||||
ps->callback_count = 0;
|
||||
atomic_set(&ps->pending_count, 0);
|
||||
ps->callbacks = NULL;
|
||||
|
||||
ps->metadata_wq = create_singlethread_workqueue("ksnaphd");
|
||||
if (!ps->metadata_wq) {
|
||||
kfree(ps);
|
||||
DMERR("couldn't start header metadata update thread");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
store->destroy = persistent_destroy;
|
||||
store->read_metadata = persistent_read_metadata;
|
||||
store->prepare_exception = persistent_prepare;
|
||||
store->commit_exception = persistent_commit;
|
||||
store->drop_snapshot = persistent_drop;
|
||||
store->fraction_full = persistent_fraction_full;
|
||||
store->context = ps;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* Implementation of the store for non-persistent snapshots.
|
||||
*---------------------------------------------------------------*/
|
||||
struct transient_c {
|
||||
sector_t next_free;
|
||||
};
|
||||
|
||||
static void transient_destroy(struct exception_store *store)
|
||||
{
|
||||
kfree(store->context);
|
||||
}
|
||||
|
||||
static int transient_read_metadata(struct exception_store *store)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int transient_prepare(struct exception_store *store,
|
||||
struct dm_snap_exception *e)
|
||||
{
|
||||
struct transient_c *tc = (struct transient_c *) store->context;
|
||||
sector_t size = get_dev_size(store->snap->cow->bdev);
|
||||
|
||||
if (size < (tc->next_free + store->snap->chunk_size))
|
||||
return -1;
|
||||
|
||||
e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
|
||||
tc->next_free += store->snap->chunk_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void transient_commit(struct exception_store *store,
|
||||
struct dm_snap_exception *e,
|
||||
void (*callback) (void *, int success),
|
||||
void *callback_context)
|
||||
{
|
||||
/* Just succeed */
|
||||
callback(callback_context, 1);
|
||||
}
|
||||
|
||||
static void transient_fraction_full(struct exception_store *store,
|
||||
sector_t *numerator, sector_t *denominator)
|
||||
{
|
||||
*numerator = ((struct transient_c *) store->context)->next_free;
|
||||
*denominator = get_dev_size(store->snap->cow->bdev);
|
||||
}
|
||||
|
||||
int dm_create_transient(struct exception_store *store)
|
||||
{
|
||||
struct transient_c *tc;
|
||||
|
||||
store->destroy = transient_destroy;
|
||||
store->read_metadata = transient_read_metadata;
|
||||
store->prepare_exception = transient_prepare;
|
||||
store->commit_exception = transient_commit;
|
||||
store->drop_snapshot = NULL;
|
||||
store->fraction_full = transient_fraction_full;
|
||||
|
||||
tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
|
||||
if (!tc)
|
||||
return -ENOMEM;
|
||||
|
||||
tc->next_free = 0;
|
||||
store->context = tc;
|
||||
|
||||
return 0;
|
||||
dm_persistent_snapshot_exit();
|
||||
dm_transient_snapshot_exit();
|
||||
}
|
||||
|
|
148
drivers/md/dm-exception-store.h
Normal file
148
drivers/md/dm-exception-store.h
Normal file
|
@ -0,0 +1,148 @@
|
|||
/*
|
||||
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
|
||||
* Copyright (C) 2008 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* Device-mapper snapshot exception store.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_DM_EXCEPTION_STORE
|
||||
#define _LINUX_DM_EXCEPTION_STORE
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
/*
|
||||
* The snapshot code deals with largish chunks of the disk at a
|
||||
* time. Typically 32k - 512k.
|
||||
*/
|
||||
typedef sector_t chunk_t;
|
||||
|
||||
/*
|
||||
* An exception is used where an old chunk of data has been
|
||||
* replaced by a new one.
|
||||
* If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number
|
||||
* of chunks that follow contiguously. Remaining bits hold the number of the
|
||||
* chunk within the device.
|
||||
*/
|
||||
struct dm_snap_exception {
|
||||
struct list_head hash_list;
|
||||
|
||||
chunk_t old_chunk;
|
||||
chunk_t new_chunk;
|
||||
};
|
||||
|
||||
/*
|
||||
* Abstraction to handle the meta/layout of exception stores (the
|
||||
* COW device).
|
||||
*/
|
||||
struct dm_exception_store {
|
||||
/*
|
||||
* Destroys this object when you've finished with it.
|
||||
*/
|
||||
void (*destroy) (struct dm_exception_store *store);
|
||||
|
||||
/*
|
||||
* The target shouldn't read the COW device until this is
|
||||
* called. As exceptions are read from the COW, they are
|
||||
* reported back via the callback.
|
||||
*/
|
||||
int (*read_metadata) (struct dm_exception_store *store,
|
||||
int (*callback)(void *callback_context,
|
||||
chunk_t old, chunk_t new),
|
||||
void *callback_context);
|
||||
|
||||
/*
|
||||
* Find somewhere to store the next exception.
|
||||
*/
|
||||
int (*prepare_exception) (struct dm_exception_store *store,
|
||||
struct dm_snap_exception *e);
|
||||
|
||||
/*
|
||||
* Update the metadata with this exception.
|
||||
*/
|
||||
void (*commit_exception) (struct dm_exception_store *store,
|
||||
struct dm_snap_exception *e,
|
||||
void (*callback) (void *, int success),
|
||||
void *callback_context);
|
||||
|
||||
/*
|
||||
* The snapshot is invalid, note this in the metadata.
|
||||
*/
|
||||
void (*drop_snapshot) (struct dm_exception_store *store);
|
||||
|
||||
int (*status) (struct dm_exception_store *store, status_type_t status,
|
||||
char *result, unsigned int maxlen);
|
||||
|
||||
/*
|
||||
* Return how full the snapshot is.
|
||||
*/
|
||||
void (*fraction_full) (struct dm_exception_store *store,
|
||||
sector_t *numerator,
|
||||
sector_t *denominator);
|
||||
|
||||
struct dm_snapshot *snap;
|
||||
void *context;
|
||||
};
|
||||
|
||||
/*
|
||||
* Funtions to manipulate consecutive chunks
|
||||
*/
|
||||
# if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
|
||||
# define DM_CHUNK_CONSECUTIVE_BITS 8
|
||||
# define DM_CHUNK_NUMBER_BITS 56
|
||||
|
||||
static inline chunk_t dm_chunk_number(chunk_t chunk)
|
||||
{
|
||||
return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL);
|
||||
}
|
||||
|
||||
static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e)
|
||||
{
|
||||
return e->new_chunk >> DM_CHUNK_NUMBER_BITS;
|
||||
}
|
||||
|
||||
static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
|
||||
{
|
||||
e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS);
|
||||
|
||||
BUG_ON(!dm_consecutive_chunk_count(e));
|
||||
}
|
||||
|
||||
# else
|
||||
# define DM_CHUNK_CONSECUTIVE_BITS 0
|
||||
|
||||
static inline chunk_t dm_chunk_number(chunk_t chunk)
|
||||
{
|
||||
return chunk;
|
||||
}
|
||||
|
||||
static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
|
||||
{
|
||||
}
|
||||
|
||||
# endif
|
||||
|
||||
int dm_exception_store_init(void);
|
||||
void dm_exception_store_exit(void);
|
||||
|
||||
/*
|
||||
* Two exception store implementations.
|
||||
*/
|
||||
int dm_persistent_snapshot_init(void);
|
||||
void dm_persistent_snapshot_exit(void);
|
||||
|
||||
int dm_transient_snapshot_init(void);
|
||||
void dm_transient_snapshot_exit(void);
|
||||
|
||||
int dm_create_persistent(struct dm_exception_store *store);
|
||||
|
||||
int dm_create_transient(struct dm_exception_store *store);
|
||||
|
||||
#endif /* _LINUX_DM_EXCEPTION_STORE */
|
|
@ -233,7 +233,7 @@ static void __hash_remove(struct hash_cell *hc)
|
|||
}
|
||||
|
||||
if (hc->new_map)
|
||||
dm_table_put(hc->new_map);
|
||||
dm_table_destroy(hc->new_map);
|
||||
dm_put(hc->md);
|
||||
free_cell(hc);
|
||||
}
|
||||
|
@ -827,8 +827,8 @@ static int do_resume(struct dm_ioctl *param)
|
|||
|
||||
r = dm_swap_table(md, new_map);
|
||||
if (r) {
|
||||
dm_table_destroy(new_map);
|
||||
dm_put(md);
|
||||
dm_table_put(new_map);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -836,8 +836,6 @@ static int do_resume(struct dm_ioctl *param)
|
|||
set_disk_ro(dm_disk(md), 0);
|
||||
else
|
||||
set_disk_ro(dm_disk(md), 1);
|
||||
|
||||
dm_table_put(new_map);
|
||||
}
|
||||
|
||||
if (dm_suspended(md))
|
||||
|
@ -1080,7 +1078,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
|
|||
}
|
||||
|
||||
if (hc->new_map)
|
||||
dm_table_put(hc->new_map);
|
||||
dm_table_destroy(hc->new_map);
|
||||
hc->new_map = t;
|
||||
up_write(&_hash_lock);
|
||||
|
||||
|
@ -1109,7 +1107,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
|
|||
}
|
||||
|
||||
if (hc->new_map) {
|
||||
dm_table_put(hc->new_map);
|
||||
dm_table_destroy(hc->new_map);
|
||||
hc->new_map = NULL;
|
||||
}
|
||||
|
||||
|
@ -1550,8 +1548,10 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid)
|
|||
goto out;
|
||||
}
|
||||
|
||||
strcpy(name, hc->name);
|
||||
strcpy(uuid, hc->uuid ? : "");
|
||||
if (name)
|
||||
strcpy(name, hc->name);
|
||||
if (uuid)
|
||||
strcpy(uuid, hc->uuid ? : "");
|
||||
|
||||
out:
|
||||
up_read(&_hash_lock);
|
||||
|
|
|
@ -142,6 +142,7 @@ static struct target_type linear_target = {
|
|||
.status = linear_status,
|
||||
.ioctl = linear_ioctl,
|
||||
.merge = linear_merge,
|
||||
.features = DM_TARGET_SUPPORTS_BARRIERS,
|
||||
};
|
||||
|
||||
int __init dm_linear_init(void)
|
||||
|
@ -156,8 +157,5 @@ int __init dm_linear_init(void)
|
|||
|
||||
void dm_linear_exit(void)
|
||||
{
|
||||
int r = dm_unregister_target(&linear_target);
|
||||
|
||||
if (r < 0)
|
||||
DMERR("unregister failed %d", r);
|
||||
dm_unregister_target(&linear_target);
|
||||
}
|
||||
|
|
|
@ -326,8 +326,6 @@ static void header_from_disk(struct log_header *core, struct log_header *disk)
|
|||
static int rw_header(struct log_c *lc, int rw)
|
||||
{
|
||||
lc->io_req.bi_rw = rw;
|
||||
lc->io_req.mem.ptr.vma = lc->disk_header;
|
||||
lc->io_req.notify.fn = NULL;
|
||||
|
||||
return dm_io(&lc->io_req, 1, &lc->header_location, NULL);
|
||||
}
|
||||
|
@ -362,10 +360,15 @@ static int read_header(struct log_c *log)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline int write_header(struct log_c *log)
|
||||
static int _check_region_size(struct dm_target *ti, uint32_t region_size)
|
||||
{
|
||||
header_to_disk(&log->header, log->disk_header);
|
||||
return rw_header(log, WRITE);
|
||||
if (region_size < 2 || region_size > ti->len)
|
||||
return 0;
|
||||
|
||||
if (!is_power_of_2(region_size))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------
|
||||
|
@ -403,8 +406,9 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
|
|||
}
|
||||
}
|
||||
|
||||
if (sscanf(argv[0], "%u", ®ion_size) != 1) {
|
||||
DMWARN("invalid region size string");
|
||||
if (sscanf(argv[0], "%u", ®ion_size) != 1 ||
|
||||
!_check_region_size(ti, region_size)) {
|
||||
DMWARN("invalid region size %s", argv[0]);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -453,8 +457,18 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
|
|||
*/
|
||||
buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) +
|
||||
bitset_size, ti->limits.hardsect_size);
|
||||
|
||||
if (buf_size > dev->bdev->bd_inode->i_size) {
|
||||
DMWARN("log device %s too small: need %llu bytes",
|
||||
dev->name, (unsigned long long)buf_size);
|
||||
kfree(lc);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
lc->header_location.count = buf_size >> SECTOR_SHIFT;
|
||||
|
||||
lc->io_req.mem.type = DM_IO_VMA;
|
||||
lc->io_req.notify.fn = NULL;
|
||||
lc->io_req.client = dm_io_client_create(dm_div_up(buf_size,
|
||||
PAGE_SIZE));
|
||||
if (IS_ERR(lc->io_req.client)) {
|
||||
|
@ -467,10 +481,12 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
|
|||
lc->disk_header = vmalloc(buf_size);
|
||||
if (!lc->disk_header) {
|
||||
DMWARN("couldn't allocate disk log buffer");
|
||||
dm_io_client_destroy(lc->io_req.client);
|
||||
kfree(lc);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
lc->io_req.mem.ptr.vma = lc->disk_header;
|
||||
lc->clean_bits = (void *)lc->disk_header +
|
||||
(LOG_OFFSET << SECTOR_SHIFT);
|
||||
}
|
||||
|
@ -482,6 +498,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
|
|||
DMWARN("couldn't allocate sync bitset");
|
||||
if (!dev)
|
||||
vfree(lc->clean_bits);
|
||||
else
|
||||
dm_io_client_destroy(lc->io_req.client);
|
||||
vfree(lc->disk_header);
|
||||
kfree(lc);
|
||||
return -ENOMEM;
|
||||
|
@ -495,6 +513,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
|
|||
vfree(lc->sync_bits);
|
||||
if (!dev)
|
||||
vfree(lc->clean_bits);
|
||||
else
|
||||
dm_io_client_destroy(lc->io_req.client);
|
||||
vfree(lc->disk_header);
|
||||
kfree(lc);
|
||||
return -ENOMEM;
|
||||
|
@ -631,8 +651,10 @@ static int disk_resume(struct dm_dirty_log *log)
|
|||
/* set the correct number of regions in the header */
|
||||
lc->header.nr_regions = lc->region_count;
|
||||
|
||||
header_to_disk(&lc->header, lc->disk_header);
|
||||
|
||||
/* write the new header */
|
||||
r = write_header(lc);
|
||||
r = rw_header(lc, WRITE);
|
||||
if (r) {
|
||||
DMWARN("%s: Failed to write header on dirty region log device",
|
||||
lc->log_dev->name);
|
||||
|
@ -682,7 +704,7 @@ static int disk_flush(struct dm_dirty_log *log)
|
|||
if (!lc->touched)
|
||||
return 0;
|
||||
|
||||
r = write_header(lc);
|
||||
r = rw_header(lc, WRITE);
|
||||
if (r)
|
||||
fail_log_device(lc);
|
||||
else
|
||||
|
|
|
@ -889,7 +889,7 @@ static int fail_path(struct pgpath *pgpath)
|
|||
dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
|
||||
pgpath->path.dev->name, m->nr_valid_paths);
|
||||
|
||||
queue_work(kmultipathd, &m->trigger_event);
|
||||
schedule_work(&m->trigger_event);
|
||||
queue_work(kmultipathd, &pgpath->deactivate_path);
|
||||
|
||||
out:
|
||||
|
@ -932,7 +932,7 @@ static int reinstate_path(struct pgpath *pgpath)
|
|||
dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
|
||||
pgpath->path.dev->name, m->nr_valid_paths);
|
||||
|
||||
queue_work(kmultipathd, &m->trigger_event);
|
||||
schedule_work(&m->trigger_event);
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
|
@ -976,7 +976,7 @@ static void bypass_pg(struct multipath *m, struct priority_group *pg,
|
|||
|
||||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
|
||||
queue_work(kmultipathd, &m->trigger_event);
|
||||
schedule_work(&m->trigger_event);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1006,7 +1006,7 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
|
|||
}
|
||||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
|
||||
queue_work(kmultipathd, &m->trigger_event);
|
||||
schedule_work(&m->trigger_event);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1495,14 +1495,10 @@ static int __init dm_multipath_init(void)
|
|||
|
||||
static void __exit dm_multipath_exit(void)
|
||||
{
|
||||
int r;
|
||||
|
||||
destroy_workqueue(kmpath_handlerd);
|
||||
destroy_workqueue(kmultipathd);
|
||||
|
||||
r = dm_unregister_target(&multipath_target);
|
||||
if (r < 0)
|
||||
DMERR("target unregister failed %d", r);
|
||||
dm_unregister_target(&multipath_target);
|
||||
kmem_cache_destroy(_mpio_cache);
|
||||
}
|
||||
|
||||
|
|
|
@ -197,9 +197,6 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
|
|||
struct mirror_set *ms = m->ms;
|
||||
struct mirror *new;
|
||||
|
||||
if (!errors_handled(ms))
|
||||
return;
|
||||
|
||||
/*
|
||||
* error_count is used for nothing more than a
|
||||
* simple way to tell if a device has encountered
|
||||
|
@ -210,6 +207,9 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
|
|||
if (test_and_set_bit(error_type, &m->error_type))
|
||||
return;
|
||||
|
||||
if (!errors_handled(ms))
|
||||
return;
|
||||
|
||||
if (m != get_default_mirror(ms))
|
||||
goto out;
|
||||
|
||||
|
@ -808,12 +808,6 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti,
|
|||
kfree(ms);
|
||||
}
|
||||
|
||||
static inline int _check_region_size(struct dm_target *ti, uint32_t size)
|
||||
{
|
||||
return !(size % (PAGE_SIZE >> 9) || !is_power_of_2(size) ||
|
||||
size > ti->len);
|
||||
}
|
||||
|
||||
static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
|
||||
unsigned int mirror, char **argv)
|
||||
{
|
||||
|
@ -872,12 +866,6 @@ static struct dm_dirty_log *create_dirty_log(struct dm_target *ti,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (!_check_region_size(ti, dl->type->get_region_size(dl))) {
|
||||
ti->error = "Invalid region size";
|
||||
dm_dirty_log_destroy(dl);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return dl;
|
||||
}
|
||||
|
||||
|
@ -1300,11 +1288,7 @@ static int __init dm_mirror_init(void)
|
|||
|
||||
static void __exit dm_mirror_exit(void)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = dm_unregister_target(&mirror_target);
|
||||
if (r < 0)
|
||||
DMERR("unregister failed %d", r);
|
||||
dm_unregister_target(&mirror_target);
|
||||
}
|
||||
|
||||
/* Module hooks */
|
||||
|
|
704
drivers/md/dm-snap-persistent.c
Normal file
704
drivers/md/dm-snap-persistent.c
Normal file
|
@ -0,0 +1,704 @@
|
|||
/*
|
||||
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
|
||||
* Copyright (C) 2006-2008 Red Hat GmbH
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm-exception-store.h"
|
||||
#include "dm-snap.h"
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/dm-io.h>
|
||||
|
||||
#define DM_MSG_PREFIX "persistent snapshot"
|
||||
#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* Persistent snapshots, by persistent we mean that the snapshot
|
||||
* will survive a reboot.
|
||||
*---------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
* We need to store a record of which parts of the origin have
|
||||
* been copied to the snapshot device. The snapshot code
|
||||
* requires that we copy exception chunks to chunk aligned areas
|
||||
* of the COW store. It makes sense therefore, to store the
|
||||
* metadata in chunk size blocks.
|
||||
*
|
||||
* There is no backward or forward compatibility implemented,
|
||||
* snapshots with different disk versions than the kernel will
|
||||
* not be usable. It is expected that "lvcreate" will blank out
|
||||
* the start of a fresh COW device before calling the snapshot
|
||||
* constructor.
|
||||
*
|
||||
* The first chunk of the COW device just contains the header.
|
||||
* After this there is a chunk filled with exception metadata,
|
||||
* followed by as many exception chunks as can fit in the
|
||||
* metadata areas.
|
||||
*
|
||||
* All on disk structures are in little-endian format. The end
|
||||
* of the exceptions info is indicated by an exception with a
|
||||
* new_chunk of 0, which is invalid since it would point to the
|
||||
* header chunk.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Magic for persistent snapshots: "SnAp" - Feeble isn't it.
|
||||
*/
|
||||
#define SNAP_MAGIC 0x70416e53
|
||||
|
||||
/*
|
||||
* The on-disk version of the metadata.
|
||||
*/
|
||||
#define SNAPSHOT_DISK_VERSION 1
|
||||
|
||||
struct disk_header {
|
||||
uint32_t magic;
|
||||
|
||||
/*
|
||||
* Is this snapshot valid. There is no way of recovering
|
||||
* an invalid snapshot.
|
||||
*/
|
||||
uint32_t valid;
|
||||
|
||||
/*
|
||||
* Simple, incrementing version. no backward
|
||||
* compatibility.
|
||||
*/
|
||||
uint32_t version;
|
||||
|
||||
/* In sectors */
|
||||
uint32_t chunk_size;
|
||||
};
|
||||
|
||||
struct disk_exception {
|
||||
uint64_t old_chunk;
|
||||
uint64_t new_chunk;
|
||||
};
|
||||
|
||||
struct commit_callback {
|
||||
void (*callback)(void *, int success);
|
||||
void *context;
|
||||
};
|
||||
|
||||
/*
|
||||
* The top level structure for a persistent exception store.
|
||||
*/
|
||||
struct pstore {
|
||||
struct dm_snapshot *snap; /* up pointer to my snapshot */
|
||||
int version;
|
||||
int valid;
|
||||
uint32_t exceptions_per_area;
|
||||
|
||||
/*
|
||||
* Now that we have an asynchronous kcopyd there is no
|
||||
* need for large chunk sizes, so it wont hurt to have a
|
||||
* whole chunks worth of metadata in memory at once.
|
||||
*/
|
||||
void *area;
|
||||
|
||||
/*
|
||||
* An area of zeros used to clear the next area.
|
||||
*/
|
||||
void *zero_area;
|
||||
|
||||
/*
|
||||
* Used to keep track of which metadata area the data in
|
||||
* 'chunk' refers to.
|
||||
*/
|
||||
chunk_t current_area;
|
||||
|
||||
/*
|
||||
* The next free chunk for an exception.
|
||||
*/
|
||||
chunk_t next_free;
|
||||
|
||||
/*
|
||||
* The index of next free exception in the current
|
||||
* metadata area.
|
||||
*/
|
||||
uint32_t current_committed;
|
||||
|
||||
atomic_t pending_count;
|
||||
uint32_t callback_count;
|
||||
struct commit_callback *callbacks;
|
||||
struct dm_io_client *io_client;
|
||||
|
||||
struct workqueue_struct *metadata_wq;
|
||||
};
|
||||
|
||||
static unsigned sectors_to_pages(unsigned sectors)
|
||||
{
|
||||
return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9);
|
||||
}
|
||||
|
||||
static int alloc_area(struct pstore *ps)
|
||||
{
|
||||
int r = -ENOMEM;
|
||||
size_t len;
|
||||
|
||||
len = ps->snap->chunk_size << SECTOR_SHIFT;
|
||||
|
||||
/*
|
||||
* Allocate the chunk_size block of memory that will hold
|
||||
* a single metadata area.
|
||||
*/
|
||||
ps->area = vmalloc(len);
|
||||
if (!ps->area)
|
||||
return r;
|
||||
|
||||
ps->zero_area = vmalloc(len);
|
||||
if (!ps->zero_area) {
|
||||
vfree(ps->area);
|
||||
return r;
|
||||
}
|
||||
memset(ps->zero_area, 0, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_area(struct pstore *ps)
|
||||
{
|
||||
vfree(ps->area);
|
||||
ps->area = NULL;
|
||||
vfree(ps->zero_area);
|
||||
ps->zero_area = NULL;
|
||||
}
|
||||
|
||||
struct mdata_req {
|
||||
struct dm_io_region *where;
|
||||
struct dm_io_request *io_req;
|
||||
struct work_struct work;
|
||||
int result;
|
||||
};
|
||||
|
||||
static void do_metadata(struct work_struct *work)
|
||||
{
|
||||
struct mdata_req *req = container_of(work, struct mdata_req, work);
|
||||
|
||||
req->result = dm_io(req->io_req, 1, req->where, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read or write a chunk aligned and sized block of data from a device.
|
||||
*/
|
||||
static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
|
||||
{
|
||||
struct dm_io_region where = {
|
||||
.bdev = ps->snap->cow->bdev,
|
||||
.sector = ps->snap->chunk_size * chunk,
|
||||
.count = ps->snap->chunk_size,
|
||||
};
|
||||
struct dm_io_request io_req = {
|
||||
.bi_rw = rw,
|
||||
.mem.type = DM_IO_VMA,
|
||||
.mem.ptr.vma = ps->area,
|
||||
.client = ps->io_client,
|
||||
.notify.fn = NULL,
|
||||
};
|
||||
struct mdata_req req;
|
||||
|
||||
if (!metadata)
|
||||
return dm_io(&io_req, 1, &where, NULL);
|
||||
|
||||
req.where = &where;
|
||||
req.io_req = &io_req;
|
||||
|
||||
/*
|
||||
* Issue the synchronous I/O from a different thread
|
||||
* to avoid generic_make_request recursion.
|
||||
*/
|
||||
INIT_WORK(&req.work, do_metadata);
|
||||
queue_work(ps->metadata_wq, &req.work);
|
||||
flush_workqueue(ps->metadata_wq);
|
||||
|
||||
return req.result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a metadata area index to a chunk index.
|
||||
*/
|
||||
static chunk_t area_location(struct pstore *ps, chunk_t area)
|
||||
{
|
||||
return 1 + ((ps->exceptions_per_area + 1) * area);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read or write a metadata area. Remembering to skip the first
|
||||
* chunk which holds the header.
|
||||
*/
|
||||
static int area_io(struct pstore *ps, int rw)
|
||||
{
|
||||
int r;
|
||||
chunk_t chunk;
|
||||
|
||||
chunk = area_location(ps, ps->current_area);
|
||||
|
||||
r = chunk_io(ps, chunk, rw, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void zero_memory_area(struct pstore *ps)
|
||||
{
|
||||
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
|
||||
}
|
||||
|
||||
static int zero_disk_area(struct pstore *ps, chunk_t area)
|
||||
{
|
||||
struct dm_io_region where = {
|
||||
.bdev = ps->snap->cow->bdev,
|
||||
.sector = ps->snap->chunk_size * area_location(ps, area),
|
||||
.count = ps->snap->chunk_size,
|
||||
};
|
||||
struct dm_io_request io_req = {
|
||||
.bi_rw = WRITE,
|
||||
.mem.type = DM_IO_VMA,
|
||||
.mem.ptr.vma = ps->zero_area,
|
||||
.client = ps->io_client,
|
||||
.notify.fn = NULL,
|
||||
};
|
||||
|
||||
return dm_io(&io_req, 1, &where, NULL);
|
||||
}
|
||||
|
||||
static int read_header(struct pstore *ps, int *new_snapshot)
|
||||
{
|
||||
int r;
|
||||
struct disk_header *dh;
|
||||
chunk_t chunk_size;
|
||||
int chunk_size_supplied = 1;
|
||||
|
||||
/*
|
||||
* Use default chunk size (or hardsect_size, if larger) if none supplied
|
||||
*/
|
||||
if (!ps->snap->chunk_size) {
|
||||
ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
|
||||
bdev_hardsect_size(ps->snap->cow->bdev) >> 9);
|
||||
ps->snap->chunk_mask = ps->snap->chunk_size - 1;
|
||||
ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1;
|
||||
chunk_size_supplied = 0;
|
||||
}
|
||||
|
||||
ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap->
|
||||
chunk_size));
|
||||
if (IS_ERR(ps->io_client))
|
||||
return PTR_ERR(ps->io_client);
|
||||
|
||||
r = alloc_area(ps);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = chunk_io(ps, 0, READ, 1);
|
||||
if (r)
|
||||
goto bad;
|
||||
|
||||
dh = (struct disk_header *) ps->area;
|
||||
|
||||
if (le32_to_cpu(dh->magic) == 0) {
|
||||
*new_snapshot = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
|
||||
DMWARN("Invalid or corrupt snapshot");
|
||||
r = -ENXIO;
|
||||
goto bad;
|
||||
}
|
||||
|
||||
*new_snapshot = 0;
|
||||
ps->valid = le32_to_cpu(dh->valid);
|
||||
ps->version = le32_to_cpu(dh->version);
|
||||
chunk_size = le32_to_cpu(dh->chunk_size);
|
||||
|
||||
if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size)
|
||||
return 0;
|
||||
|
||||
DMWARN("chunk size %llu in device metadata overrides "
|
||||
"table chunk size of %llu.",
|
||||
(unsigned long long)chunk_size,
|
||||
(unsigned long long)ps->snap->chunk_size);
|
||||
|
||||
/* We had a bogus chunk_size. Fix stuff up. */
|
||||
free_area(ps);
|
||||
|
||||
ps->snap->chunk_size = chunk_size;
|
||||
ps->snap->chunk_mask = chunk_size - 1;
|
||||
ps->snap->chunk_shift = ffs(chunk_size) - 1;
|
||||
|
||||
r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size),
|
||||
ps->io_client);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = alloc_area(ps);
|
||||
return r;
|
||||
|
||||
bad:
|
||||
free_area(ps);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int write_header(struct pstore *ps)
|
||||
{
|
||||
struct disk_header *dh;
|
||||
|
||||
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
|
||||
|
||||
dh = (struct disk_header *) ps->area;
|
||||
dh->magic = cpu_to_le32(SNAP_MAGIC);
|
||||
dh->valid = cpu_to_le32(ps->valid);
|
||||
dh->version = cpu_to_le32(ps->version);
|
||||
dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
|
||||
|
||||
return chunk_io(ps, 0, WRITE, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Access functions for the disk exceptions, these do the endian conversions.
|
||||
*/
|
||||
static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
|
||||
{
|
||||
BUG_ON(index >= ps->exceptions_per_area);
|
||||
|
||||
return ((struct disk_exception *) ps->area) + index;
|
||||
}
|
||||
|
||||
static void read_exception(struct pstore *ps,
|
||||
uint32_t index, struct disk_exception *result)
|
||||
{
|
||||
struct disk_exception *e = get_exception(ps, index);
|
||||
|
||||
/* copy it */
|
||||
result->old_chunk = le64_to_cpu(e->old_chunk);
|
||||
result->new_chunk = le64_to_cpu(e->new_chunk);
|
||||
}
|
||||
|
||||
static void write_exception(struct pstore *ps,
|
||||
uint32_t index, struct disk_exception *de)
|
||||
{
|
||||
struct disk_exception *e = get_exception(ps, index);
|
||||
|
||||
/* copy it */
|
||||
e->old_chunk = cpu_to_le64(de->old_chunk);
|
||||
e->new_chunk = cpu_to_le64(de->new_chunk);
|
||||
}
|
||||
|
||||
/*
|
||||
* Registers the exceptions that are present in the current area.
|
||||
* 'full' is filled in to indicate if the area has been
|
||||
* filled.
|
||||
*/
|
||||
static int insert_exceptions(struct pstore *ps,
|
||||
int (*callback)(void *callback_context,
|
||||
chunk_t old, chunk_t new),
|
||||
void *callback_context,
|
||||
int *full)
|
||||
{
|
||||
int r;
|
||||
unsigned int i;
|
||||
struct disk_exception de;
|
||||
|
||||
/* presume the area is full */
|
||||
*full = 1;
|
||||
|
||||
for (i = 0; i < ps->exceptions_per_area; i++) {
|
||||
read_exception(ps, i, &de);
|
||||
|
||||
/*
|
||||
* If the new_chunk is pointing at the start of
|
||||
* the COW device, where the first metadata area
|
||||
* is we know that we've hit the end of the
|
||||
* exceptions. Therefore the area is not full.
|
||||
*/
|
||||
if (de.new_chunk == 0LL) {
|
||||
ps->current_committed = i;
|
||||
*full = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Keep track of the start of the free chunks.
|
||||
*/
|
||||
if (ps->next_free <= de.new_chunk)
|
||||
ps->next_free = de.new_chunk + 1;
|
||||
|
||||
/*
|
||||
* Otherwise we add the exception to the snapshot.
|
||||
*/
|
||||
r = callback(callback_context, de.old_chunk, de.new_chunk);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_exceptions(struct pstore *ps,
|
||||
int (*callback)(void *callback_context, chunk_t old,
|
||||
chunk_t new),
|
||||
void *callback_context)
|
||||
{
|
||||
int r, full = 1;
|
||||
|
||||
/*
|
||||
* Keeping reading chunks and inserting exceptions until
|
||||
* we find a partially full area.
|
||||
*/
|
||||
for (ps->current_area = 0; full; ps->current_area++) {
|
||||
r = area_io(ps, READ);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = insert_exceptions(ps, callback, callback_context, &full);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
ps->current_area--;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pstore *get_info(struct dm_exception_store *store)
|
||||
{
|
||||
return (struct pstore *) store->context;
|
||||
}
|
||||
|
||||
static void persistent_fraction_full(struct dm_exception_store *store,
|
||||
sector_t *numerator, sector_t *denominator)
|
||||
{
|
||||
*numerator = get_info(store)->next_free * store->snap->chunk_size;
|
||||
*denominator = get_dev_size(store->snap->cow->bdev);
|
||||
}
|
||||
|
||||
static void persistent_destroy(struct dm_exception_store *store)
|
||||
{
|
||||
struct pstore *ps = get_info(store);
|
||||
|
||||
destroy_workqueue(ps->metadata_wq);
|
||||
dm_io_client_destroy(ps->io_client);
|
||||
vfree(ps->callbacks);
|
||||
free_area(ps);
|
||||
kfree(ps);
|
||||
}
|
||||
|
||||
static int persistent_read_metadata(struct dm_exception_store *store,
|
||||
int (*callback)(void *callback_context,
|
||||
chunk_t old, chunk_t new),
|
||||
void *callback_context)
|
||||
{
|
||||
int r, uninitialized_var(new_snapshot);
|
||||
struct pstore *ps = get_info(store);
|
||||
|
||||
/*
|
||||
* Read the snapshot header.
|
||||
*/
|
||||
r = read_header(ps, &new_snapshot);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/*
|
||||
* Now we know correct chunk_size, complete the initialisation.
|
||||
*/
|
||||
ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
|
||||
sizeof(struct disk_exception);
|
||||
ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
|
||||
sizeof(*ps->callbacks));
|
||||
if (!ps->callbacks)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Do we need to setup a new snapshot ?
|
||||
*/
|
||||
if (new_snapshot) {
|
||||
r = write_header(ps);
|
||||
if (r) {
|
||||
DMWARN("write_header failed");
|
||||
return r;
|
||||
}
|
||||
|
||||
ps->current_area = 0;
|
||||
zero_memory_area(ps);
|
||||
r = zero_disk_area(ps, 0);
|
||||
if (r) {
|
||||
DMWARN("zero_disk_area(0) failed");
|
||||
return r;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Sanity checks.
|
||||
*/
|
||||
if (ps->version != SNAPSHOT_DISK_VERSION) {
|
||||
DMWARN("unable to handle snapshot disk version %d",
|
||||
ps->version);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Metadata are valid, but snapshot is invalidated
|
||||
*/
|
||||
if (!ps->valid)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* Read the metadata.
|
||||
*/
|
||||
r = read_exceptions(ps, callback, callback_context);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int persistent_prepare_exception(struct dm_exception_store *store,
|
||||
struct dm_snap_exception *e)
|
||||
{
|
||||
struct pstore *ps = get_info(store);
|
||||
uint32_t stride;
|
||||
chunk_t next_free;
|
||||
sector_t size = get_dev_size(store->snap->cow->bdev);
|
||||
|
||||
/* Is there enough room ? */
|
||||
if (size < ((ps->next_free + 1) * store->snap->chunk_size))
|
||||
return -ENOSPC;
|
||||
|
||||
e->new_chunk = ps->next_free;
|
||||
|
||||
/*
|
||||
* Move onto the next free pending, making sure to take
|
||||
* into account the location of the metadata chunks.
|
||||
*/
|
||||
stride = (ps->exceptions_per_area + 1);
|
||||
next_free = ++ps->next_free;
|
||||
if (sector_div(next_free, stride) == 1)
|
||||
ps->next_free++;
|
||||
|
||||
atomic_inc(&ps->pending_count);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void persistent_commit_exception(struct dm_exception_store *store,
|
||||
struct dm_snap_exception *e,
|
||||
void (*callback) (void *, int success),
|
||||
void *callback_context)
|
||||
{
|
||||
unsigned int i;
|
||||
struct pstore *ps = get_info(store);
|
||||
struct disk_exception de;
|
||||
struct commit_callback *cb;
|
||||
|
||||
de.old_chunk = e->old_chunk;
|
||||
de.new_chunk = e->new_chunk;
|
||||
write_exception(ps, ps->current_committed++, &de);
|
||||
|
||||
/*
|
||||
* Add the callback to the back of the array. This code
|
||||
* is the only place where the callback array is
|
||||
* manipulated, and we know that it will never be called
|
||||
* multiple times concurrently.
|
||||
*/
|
||||
cb = ps->callbacks + ps->callback_count++;
|
||||
cb->callback = callback;
|
||||
cb->context = callback_context;
|
||||
|
||||
/*
|
||||
* If there are exceptions in flight and we have not yet
|
||||
* filled this metadata area there's nothing more to do.
|
||||
*/
|
||||
if (!atomic_dec_and_test(&ps->pending_count) &&
|
||||
(ps->current_committed != ps->exceptions_per_area))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If we completely filled the current area, then wipe the next one.
|
||||
*/
|
||||
if ((ps->current_committed == ps->exceptions_per_area) &&
|
||||
zero_disk_area(ps, ps->current_area + 1))
|
||||
ps->valid = 0;
|
||||
|
||||
/*
|
||||
* Commit exceptions to disk.
|
||||
*/
|
||||
if (ps->valid && area_io(ps, WRITE))
|
||||
ps->valid = 0;
|
||||
|
||||
/*
|
||||
* Advance to the next area if this one is full.
|
||||
*/
|
||||
if (ps->current_committed == ps->exceptions_per_area) {
|
||||
ps->current_committed = 0;
|
||||
ps->current_area++;
|
||||
zero_memory_area(ps);
|
||||
}
|
||||
|
||||
for (i = 0; i < ps->callback_count; i++) {
|
||||
cb = ps->callbacks + i;
|
||||
cb->callback(cb->context, ps->valid);
|
||||
}
|
||||
|
||||
ps->callback_count = 0;
|
||||
}
|
||||
|
||||
static void persistent_drop_snapshot(struct dm_exception_store *store)
|
||||
{
|
||||
struct pstore *ps = get_info(store);
|
||||
|
||||
ps->valid = 0;
|
||||
if (write_header(ps))
|
||||
DMWARN("write header failed");
|
||||
}
|
||||
|
||||
int dm_create_persistent(struct dm_exception_store *store)
|
||||
{
|
||||
struct pstore *ps;
|
||||
|
||||
/* allocate the pstore */
|
||||
ps = kmalloc(sizeof(*ps), GFP_KERNEL);
|
||||
if (!ps)
|
||||
return -ENOMEM;
|
||||
|
||||
ps->snap = store->snap;
|
||||
ps->valid = 1;
|
||||
ps->version = SNAPSHOT_DISK_VERSION;
|
||||
ps->area = NULL;
|
||||
ps->next_free = 2; /* skipping the header and first area */
|
||||
ps->current_committed = 0;
|
||||
|
||||
ps->callback_count = 0;
|
||||
atomic_set(&ps->pending_count, 0);
|
||||
ps->callbacks = NULL;
|
||||
|
||||
ps->metadata_wq = create_singlethread_workqueue("ksnaphd");
|
||||
if (!ps->metadata_wq) {
|
||||
kfree(ps);
|
||||
DMERR("couldn't start header metadata update thread");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
store->destroy = persistent_destroy;
|
||||
store->read_metadata = persistent_read_metadata;
|
||||
store->prepare_exception = persistent_prepare_exception;
|
||||
store->commit_exception = persistent_commit_exception;
|
||||
store->drop_snapshot = persistent_drop_snapshot;
|
||||
store->fraction_full = persistent_fraction_full;
|
||||
store->context = ps;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dm_persistent_snapshot_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dm_persistent_snapshot_exit(void)
|
||||
{
|
||||
}
|
98
drivers/md/dm-snap-transient.c
Normal file
98
drivers/md/dm-snap-transient.c
Normal file
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
|
||||
* Copyright (C) 2006-2008 Red Hat GmbH
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm-exception-store.h"
|
||||
#include "dm-snap.h"
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/dm-io.h>
|
||||
|
||||
#define DM_MSG_PREFIX "transient snapshot"
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* Implementation of the store for non-persistent snapshots.
|
||||
*---------------------------------------------------------------*/
|
||||
struct transient_c {
|
||||
sector_t next_free;
|
||||
};
|
||||
|
||||
static void transient_destroy(struct dm_exception_store *store)
|
||||
{
|
||||
kfree(store->context);
|
||||
}
|
||||
|
||||
static int transient_read_metadata(struct dm_exception_store *store,
|
||||
int (*callback)(void *callback_context,
|
||||
chunk_t old, chunk_t new),
|
||||
void *callback_context)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int transient_prepare_exception(struct dm_exception_store *store,
|
||||
struct dm_snap_exception *e)
|
||||
{
|
||||
struct transient_c *tc = (struct transient_c *) store->context;
|
||||
sector_t size = get_dev_size(store->snap->cow->bdev);
|
||||
|
||||
if (size < (tc->next_free + store->snap->chunk_size))
|
||||
return -1;
|
||||
|
||||
e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
|
||||
tc->next_free += store->snap->chunk_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void transient_commit_exception(struct dm_exception_store *store,
|
||||
struct dm_snap_exception *e,
|
||||
void (*callback) (void *, int success),
|
||||
void *callback_context)
|
||||
{
|
||||
/* Just succeed */
|
||||
callback(callback_context, 1);
|
||||
}
|
||||
|
||||
static void transient_fraction_full(struct dm_exception_store *store,
|
||||
sector_t *numerator, sector_t *denominator)
|
||||
{
|
||||
*numerator = ((struct transient_c *) store->context)->next_free;
|
||||
*denominator = get_dev_size(store->snap->cow->bdev);
|
||||
}
|
||||
|
||||
int dm_create_transient(struct dm_exception_store *store)
|
||||
{
|
||||
struct transient_c *tc;
|
||||
|
||||
store->destroy = transient_destroy;
|
||||
store->read_metadata = transient_read_metadata;
|
||||
store->prepare_exception = transient_prepare_exception;
|
||||
store->commit_exception = transient_commit_exception;
|
||||
store->drop_snapshot = NULL;
|
||||
store->fraction_full = transient_fraction_full;
|
||||
|
||||
tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
|
||||
if (!tc)
|
||||
return -ENOMEM;
|
||||
|
||||
tc->next_free = 0;
|
||||
store->context = tc;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dm_transient_snapshot_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dm_transient_snapshot_exit(void)
|
||||
{
|
||||
}
|
|
@ -9,6 +9,7 @@
|
|||
#include <linux/blkdev.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/device-mapper.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kdev_t.h>
|
||||
|
@ -20,6 +21,7 @@
|
|||
#include <linux/log2.h>
|
||||
#include <linux/dm-kcopyd.h>
|
||||
|
||||
#include "dm-exception-store.h"
|
||||
#include "dm-snap.h"
|
||||
#include "dm-bio-list.h"
|
||||
|
||||
|
@ -428,8 +430,13 @@ static void insert_completed_exception(struct dm_snapshot *s,
|
|||
list_add(&new_e->hash_list, e ? &e->hash_list : l);
|
||||
}
|
||||
|
||||
int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new)
|
||||
/*
|
||||
* Callback used by the exception stores to load exceptions when
|
||||
* initialising.
|
||||
*/
|
||||
static int dm_add_exception(void *context, chunk_t old, chunk_t new)
|
||||
{
|
||||
struct dm_snapshot *s = context;
|
||||
struct dm_snap_exception *e;
|
||||
|
||||
e = alloc_exception();
|
||||
|
@ -658,7 +665,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
spin_lock_init(&s->tracked_chunk_lock);
|
||||
|
||||
/* Metadata must only be loaded into one table at once */
|
||||
r = s->store.read_metadata(&s->store);
|
||||
r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s);
|
||||
if (r < 0) {
|
||||
ti->error = "Failed to read snapshot metadata";
|
||||
goto bad_load_and_register;
|
||||
|
@ -735,7 +742,7 @@ static void snapshot_dtr(struct dm_target *ti)
|
|||
unregister_snapshot(s);
|
||||
|
||||
while (atomic_read(&s->pending_exceptions_count))
|
||||
yield();
|
||||
msleep(1);
|
||||
/*
|
||||
* Ensure instructions in mempool_destroy aren't reordered
|
||||
* before atomic_read.
|
||||
|
@ -888,10 +895,10 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
|
|||
|
||||
/*
|
||||
* Check for conflicting reads. This is extremely improbable,
|
||||
* so yield() is sufficient and there is no need for a wait queue.
|
||||
* so msleep(1) is sufficient and there is no need for a wait queue.
|
||||
*/
|
||||
while (__chunk_is_tracked(s, pe->e.old_chunk))
|
||||
yield();
|
||||
msleep(1);
|
||||
|
||||
/*
|
||||
* Add a proper exception, and remove the
|
||||
|
@ -1404,6 +1411,12 @@ static int __init dm_snapshot_init(void)
|
|||
{
|
||||
int r;
|
||||
|
||||
r = dm_exception_store_init();
|
||||
if (r) {
|
||||
DMERR("Failed to initialize exception stores");
|
||||
return r;
|
||||
}
|
||||
|
||||
r = dm_register_target(&snapshot_target);
|
||||
if (r) {
|
||||
DMERR("snapshot target register failed %d", r);
|
||||
|
@ -1452,39 +1465,34 @@ static int __init dm_snapshot_init(void)
|
|||
|
||||
return 0;
|
||||
|
||||
bad_pending_pool:
|
||||
bad_pending_pool:
|
||||
kmem_cache_destroy(tracked_chunk_cache);
|
||||
bad5:
|
||||
bad5:
|
||||
kmem_cache_destroy(pending_cache);
|
||||
bad4:
|
||||
bad4:
|
||||
kmem_cache_destroy(exception_cache);
|
||||
bad3:
|
||||
bad3:
|
||||
exit_origin_hash();
|
||||
bad2:
|
||||
bad2:
|
||||
dm_unregister_target(&origin_target);
|
||||
bad1:
|
||||
bad1:
|
||||
dm_unregister_target(&snapshot_target);
|
||||
return r;
|
||||
}
|
||||
|
||||
static void __exit dm_snapshot_exit(void)
|
||||
{
|
||||
int r;
|
||||
|
||||
destroy_workqueue(ksnapd);
|
||||
|
||||
r = dm_unregister_target(&snapshot_target);
|
||||
if (r)
|
||||
DMERR("snapshot unregister failed %d", r);
|
||||
|
||||
r = dm_unregister_target(&origin_target);
|
||||
if (r)
|
||||
DMERR("origin unregister failed %d", r);
|
||||
dm_unregister_target(&snapshot_target);
|
||||
dm_unregister_target(&origin_target);
|
||||
|
||||
exit_origin_hash();
|
||||
kmem_cache_destroy(pending_cache);
|
||||
kmem_cache_destroy(exception_cache);
|
||||
kmem_cache_destroy(tracked_chunk_cache);
|
||||
|
||||
dm_exception_store_exit();
|
||||
}
|
||||
|
||||
/* Module hooks */
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
/*
|
||||
* dm-snapshot.c
|
||||
*
|
||||
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
|
@ -10,6 +8,7 @@
|
|||
#define DM_SNAPSHOT_H
|
||||
|
||||
#include <linux/device-mapper.h>
|
||||
#include "dm-exception-store.h"
|
||||
#include "dm-bio-list.h"
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
@ -20,116 +19,6 @@ struct exception_table {
|
|||
struct list_head *table;
|
||||
};
|
||||
|
||||
/*
|
||||
* The snapshot code deals with largish chunks of the disk at a
|
||||
* time. Typically 32k - 512k.
|
||||
*/
|
||||
typedef sector_t chunk_t;
|
||||
|
||||
/*
|
||||
* An exception is used where an old chunk of data has been
|
||||
* replaced by a new one.
|
||||
* If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number
|
||||
* of chunks that follow contiguously. Remaining bits hold the number of the
|
||||
* chunk within the device.
|
||||
*/
|
||||
struct dm_snap_exception {
|
||||
struct list_head hash_list;
|
||||
|
||||
chunk_t old_chunk;
|
||||
chunk_t new_chunk;
|
||||
};
|
||||
|
||||
/*
|
||||
* Funtions to manipulate consecutive chunks
|
||||
*/
|
||||
# if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
|
||||
# define DM_CHUNK_CONSECUTIVE_BITS 8
|
||||
# define DM_CHUNK_NUMBER_BITS 56
|
||||
|
||||
static inline chunk_t dm_chunk_number(chunk_t chunk)
|
||||
{
|
||||
return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL);
|
||||
}
|
||||
|
||||
static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e)
|
||||
{
|
||||
return e->new_chunk >> DM_CHUNK_NUMBER_BITS;
|
||||
}
|
||||
|
||||
static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
|
||||
{
|
||||
e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS);
|
||||
|
||||
BUG_ON(!dm_consecutive_chunk_count(e));
|
||||
}
|
||||
|
||||
# else
|
||||
# define DM_CHUNK_CONSECUTIVE_BITS 0
|
||||
|
||||
static inline chunk_t dm_chunk_number(chunk_t chunk)
|
||||
{
|
||||
return chunk;
|
||||
}
|
||||
|
||||
static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
|
||||
{
|
||||
}
|
||||
|
||||
# endif
|
||||
|
||||
/*
|
||||
* Abstraction to handle the meta/layout of exception stores (the
|
||||
* COW device).
|
||||
*/
|
||||
struct exception_store {
|
||||
|
||||
/*
|
||||
* Destroys this object when you've finished with it.
|
||||
*/
|
||||
void (*destroy) (struct exception_store *store);
|
||||
|
||||
/*
|
||||
* The target shouldn't read the COW device until this is
|
||||
* called.
|
||||
*/
|
||||
int (*read_metadata) (struct exception_store *store);
|
||||
|
||||
/*
|
||||
* Find somewhere to store the next exception.
|
||||
*/
|
||||
int (*prepare_exception) (struct exception_store *store,
|
||||
struct dm_snap_exception *e);
|
||||
|
||||
/*
|
||||
* Update the metadata with this exception.
|
||||
*/
|
||||
void (*commit_exception) (struct exception_store *store,
|
||||
struct dm_snap_exception *e,
|
||||
void (*callback) (void *, int success),
|
||||
void *callback_context);
|
||||
|
||||
/*
|
||||
* The snapshot is invalid, note this in the metadata.
|
||||
*/
|
||||
void (*drop_snapshot) (struct exception_store *store);
|
||||
|
||||
/*
|
||||
* Return how full the snapshot is.
|
||||
*/
|
||||
void (*fraction_full) (struct exception_store *store,
|
||||
sector_t *numerator,
|
||||
sector_t *denominator);
|
||||
|
||||
struct dm_snapshot *snap;
|
||||
void *context;
|
||||
};
|
||||
|
||||
#define DM_TRACKED_CHUNK_HASH_SIZE 16
|
||||
#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
|
||||
(DM_TRACKED_CHUNK_HASH_SIZE - 1))
|
||||
|
@ -172,7 +61,7 @@ struct dm_snapshot {
|
|||
spinlock_t pe_lock;
|
||||
|
||||
/* The on disk metadata handler */
|
||||
struct exception_store store;
|
||||
struct dm_exception_store store;
|
||||
|
||||
struct dm_kcopyd_client *kcopyd_client;
|
||||
|
||||
|
@ -186,20 +75,6 @@ struct dm_snapshot {
|
|||
struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
|
||||
};
|
||||
|
||||
/*
|
||||
* Used by the exception stores to load exceptions hen
|
||||
* initialising.
|
||||
*/
|
||||
int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new);
|
||||
|
||||
/*
|
||||
* Constructor and destructor for the default persistent
|
||||
* store.
|
||||
*/
|
||||
int dm_create_persistent(struct exception_store *store);
|
||||
|
||||
int dm_create_transient(struct exception_store *store);
|
||||
|
||||
/*
|
||||
* Return the number of sectors in the device.
|
||||
*/
|
||||
|
|
|
@ -337,9 +337,7 @@ int __init dm_stripe_init(void)
|
|||
|
||||
void dm_stripe_exit(void)
|
||||
{
|
||||
if (dm_unregister_target(&stripe_target))
|
||||
DMWARN("target unregistration failed");
|
||||
|
||||
dm_unregister_target(&stripe_target);
|
||||
destroy_workqueue(kstriped);
|
||||
|
||||
return;
|
||||
|
|
99
drivers/md/dm-sysfs.c
Normal file
99
drivers/md/dm-sysfs.c
Normal file
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
* Copyright (C) 2008 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/dm-ioctl.h>
|
||||
#include "dm.h"
|
||||
|
||||
struct dm_sysfs_attr {
|
||||
struct attribute attr;
|
||||
ssize_t (*show)(struct mapped_device *, char *);
|
||||
ssize_t (*store)(struct mapped_device *, char *);
|
||||
};
|
||||
|
||||
#define DM_ATTR_RO(_name) \
|
||||
struct dm_sysfs_attr dm_attr_##_name = \
|
||||
__ATTR(_name, S_IRUGO, dm_attr_##_name##_show, NULL)
|
||||
|
||||
static ssize_t dm_attr_show(struct kobject *kobj, struct attribute *attr,
|
||||
char *page)
|
||||
{
|
||||
struct dm_sysfs_attr *dm_attr;
|
||||
struct mapped_device *md;
|
||||
ssize_t ret;
|
||||
|
||||
dm_attr = container_of(attr, struct dm_sysfs_attr, attr);
|
||||
if (!dm_attr->show)
|
||||
return -EIO;
|
||||
|
||||
md = dm_get_from_kobject(kobj);
|
||||
if (!md)
|
||||
return -EINVAL;
|
||||
|
||||
ret = dm_attr->show(md, page);
|
||||
dm_put(md);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t dm_attr_name_show(struct mapped_device *md, char *buf)
|
||||
{
|
||||
if (dm_copy_name_and_uuid(md, buf, NULL))
|
||||
return -EIO;
|
||||
|
||||
strcat(buf, "\n");
|
||||
return strlen(buf);
|
||||
}
|
||||
|
||||
static ssize_t dm_attr_uuid_show(struct mapped_device *md, char *buf)
|
||||
{
|
||||
if (dm_copy_name_and_uuid(md, NULL, buf))
|
||||
return -EIO;
|
||||
|
||||
strcat(buf, "\n");
|
||||
return strlen(buf);
|
||||
}
|
||||
|
||||
static DM_ATTR_RO(name);
|
||||
static DM_ATTR_RO(uuid);
|
||||
|
||||
static struct attribute *dm_attrs[] = {
|
||||
&dm_attr_name.attr,
|
||||
&dm_attr_uuid.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct sysfs_ops dm_sysfs_ops = {
|
||||
.show = dm_attr_show,
|
||||
};
|
||||
|
||||
/*
|
||||
* dm kobject is embedded in mapped_device structure
|
||||
* no need to define release function here
|
||||
*/
|
||||
static struct kobj_type dm_ktype = {
|
||||
.sysfs_ops = &dm_sysfs_ops,
|
||||
.default_attrs = dm_attrs,
|
||||
};
|
||||
|
||||
/*
|
||||
* Initialize kobj
|
||||
* because nobody using md yet, no need to call explicit dm_get/put
|
||||
*/
|
||||
int dm_sysfs_init(struct mapped_device *md)
|
||||
{
|
||||
return kobject_init_and_add(dm_kobject(md), &dm_ktype,
|
||||
&disk_to_dev(dm_disk(md))->kobj,
|
||||
"%s", "dm");
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove kobj, called after all references removed
|
||||
*/
|
||||
void dm_sysfs_exit(struct mapped_device *md)
|
||||
{
|
||||
kobject_put(dm_kobject(md));
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2001 Sistina Software (UK) Limited.
|
||||
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
|
||||
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
@ -15,6 +15,7 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/delay.h>
|
||||
#include <asm/atomic.h>
|
||||
|
||||
#define DM_MSG_PREFIX "table"
|
||||
|
@ -24,6 +25,19 @@
|
|||
#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
|
||||
#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
|
||||
|
||||
/*
|
||||
* The table has always exactly one reference from either mapped_device->map
|
||||
* or hash_cell->new_map. This reference is not counted in table->holders.
|
||||
* A pair of dm_create_table/dm_destroy_table functions is used for table
|
||||
* creation/destruction.
|
||||
*
|
||||
* Temporary references from the other code increase table->holders. A pair
|
||||
* of dm_table_get/dm_table_put functions is used to manipulate it.
|
||||
*
|
||||
* When the table is about to be destroyed, we wait for table->holders to
|
||||
* drop to zero.
|
||||
*/
|
||||
|
||||
struct dm_table {
|
||||
struct mapped_device *md;
|
||||
atomic_t holders;
|
||||
|
@ -38,6 +52,8 @@ struct dm_table {
|
|||
sector_t *highs;
|
||||
struct dm_target *targets;
|
||||
|
||||
unsigned barriers_supported:1;
|
||||
|
||||
/*
|
||||
* Indicates the rw permissions for the new logical
|
||||
* device. This should be a combination of FMODE_READ
|
||||
|
@ -226,7 +242,8 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
|
|||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(&t->devices);
|
||||
atomic_set(&t->holders, 1);
|
||||
atomic_set(&t->holders, 0);
|
||||
t->barriers_supported = 1;
|
||||
|
||||
if (!num_targets)
|
||||
num_targets = KEYS_PER_NODE;
|
||||
|
@ -256,10 +273,14 @@ static void free_devices(struct list_head *devices)
|
|||
}
|
||||
}
|
||||
|
||||
static void table_destroy(struct dm_table *t)
|
||||
void dm_table_destroy(struct dm_table *t)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
while (atomic_read(&t->holders))
|
||||
msleep(1);
|
||||
smp_mb();
|
||||
|
||||
/* free the indexes (see dm_table_complete) */
|
||||
if (t->depth >= 2)
|
||||
vfree(t->index[t->depth - 2]);
|
||||
|
@ -297,8 +318,8 @@ void dm_table_put(struct dm_table *t)
|
|||
if (!t)
|
||||
return;
|
||||
|
||||
if (atomic_dec_and_test(&t->holders))
|
||||
table_destroy(t);
|
||||
smp_mb__before_atomic_dec();
|
||||
atomic_dec(&t->holders);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -728,6 +749,10 @@ int dm_table_add_target(struct dm_table *t, const char *type,
|
|||
/* FIXME: the plan is to combine high here and then have
|
||||
* the merge fn apply the target level restrictions. */
|
||||
combine_restrictions_low(&t->limits, &tgt->limits);
|
||||
|
||||
if (!(tgt->type->features & DM_TARGET_SUPPORTS_BARRIERS))
|
||||
t->barriers_supported = 0;
|
||||
|
||||
return 0;
|
||||
|
||||
bad:
|
||||
|
@ -772,6 +797,12 @@ int dm_table_complete(struct dm_table *t)
|
|||
|
||||
check_for_valid_limits(&t->limits);
|
||||
|
||||
/*
|
||||
* We only support barriers if there is exactly one underlying device.
|
||||
*/
|
||||
if (!list_is_singular(&t->devices))
|
||||
t->barriers_supported = 0;
|
||||
|
||||
/* how many indexes will the btree have ? */
|
||||
leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
|
||||
t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
|
||||
|
@ -986,6 +1017,12 @@ struct mapped_device *dm_table_get_md(struct dm_table *t)
|
|||
return t->md;
|
||||
}
|
||||
|
||||
int dm_table_barrier_ok(struct dm_table *t)
|
||||
{
|
||||
return t->barriers_supported;
|
||||
}
|
||||
EXPORT_SYMBOL(dm_table_barrier_ok);
|
||||
|
||||
EXPORT_SYMBOL(dm_vcalloc);
|
||||
EXPORT_SYMBOL(dm_get_device);
|
||||
EXPORT_SYMBOL(dm_put_device);
|
||||
|
|
|
@ -130,26 +130,26 @@ int dm_register_target(struct target_type *t)
|
|||
return rv;
|
||||
}
|
||||
|
||||
int dm_unregister_target(struct target_type *t)
|
||||
void dm_unregister_target(struct target_type *t)
|
||||
{
|
||||
struct tt_internal *ti;
|
||||
|
||||
down_write(&_lock);
|
||||
if (!(ti = __find_target_type(t->name))) {
|
||||
up_write(&_lock);
|
||||
return -EINVAL;
|
||||
DMCRIT("Unregistering unrecognised target: %s", t->name);
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (ti->use) {
|
||||
up_write(&_lock);
|
||||
return -ETXTBSY;
|
||||
DMCRIT("Attempt to unregister target still in use: %s",
|
||||
t->name);
|
||||
BUG();
|
||||
}
|
||||
|
||||
list_del(&ti->list);
|
||||
kfree(ti);
|
||||
|
||||
up_write(&_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -187,8 +187,7 @@ int __init dm_target_init(void)
|
|||
|
||||
void dm_target_exit(void)
|
||||
{
|
||||
if (dm_unregister_target(&error_target))
|
||||
DMWARN("error target unregistration failed");
|
||||
dm_unregister_target(&error_target);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(dm_register_target);
|
||||
|
|
|
@ -69,10 +69,7 @@ static int __init dm_zero_init(void)
|
|||
|
||||
static void __exit dm_zero_exit(void)
|
||||
{
|
||||
int r = dm_unregister_target(&zero_target);
|
||||
|
||||
if (r < 0)
|
||||
DMERR("unregister failed %d", r);
|
||||
dm_unregister_target(&zero_target);
|
||||
}
|
||||
|
||||
module_init(dm_zero_init)
|
||||
|
|
103
drivers/md/dm.c
103
drivers/md/dm.c
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
|
||||
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
|
||||
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
@ -32,6 +32,7 @@ static unsigned int _major = 0;
|
|||
|
||||
static DEFINE_SPINLOCK(_minor_lock);
|
||||
/*
|
||||
* For bio-based dm.
|
||||
* One of these is allocated per bio.
|
||||
*/
|
||||
struct dm_io {
|
||||
|
@ -43,6 +44,7 @@ struct dm_io {
|
|||
};
|
||||
|
||||
/*
|
||||
* For bio-based dm.
|
||||
* One of these is allocated per target within a bio. Hopefully
|
||||
* this will be simplified out one day.
|
||||
*/
|
||||
|
@ -54,6 +56,27 @@ struct dm_target_io {
|
|||
|
||||
DEFINE_TRACE(block_bio_complete);
|
||||
|
||||
/*
|
||||
* For request-based dm.
|
||||
* One of these is allocated per request.
|
||||
*/
|
||||
struct dm_rq_target_io {
|
||||
struct mapped_device *md;
|
||||
struct dm_target *ti;
|
||||
struct request *orig, clone;
|
||||
int error;
|
||||
union map_info info;
|
||||
};
|
||||
|
||||
/*
|
||||
* For request-based dm.
|
||||
* One of these is allocated per bio.
|
||||
*/
|
||||
struct dm_rq_clone_bio_info {
|
||||
struct bio *orig;
|
||||
struct request *rq;
|
||||
};
|
||||
|
||||
union map_info *dm_get_mapinfo(struct bio *bio)
|
||||
{
|
||||
if (bio && bio->bi_private)
|
||||
|
@ -144,11 +167,16 @@ struct mapped_device {
|
|||
|
||||
/* forced geometry settings */
|
||||
struct hd_geometry geometry;
|
||||
|
||||
/* sysfs handle */
|
||||
struct kobject kobj;
|
||||
};
|
||||
|
||||
#define MIN_IOS 256
|
||||
static struct kmem_cache *_io_cache;
|
||||
static struct kmem_cache *_tio_cache;
|
||||
static struct kmem_cache *_rq_tio_cache;
|
||||
static struct kmem_cache *_rq_bio_info_cache;
|
||||
|
||||
static int __init local_init(void)
|
||||
{
|
||||
|
@ -164,9 +192,17 @@ static int __init local_init(void)
|
|||
if (!_tio_cache)
|
||||
goto out_free_io_cache;
|
||||
|
||||
_rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
|
||||
if (!_rq_tio_cache)
|
||||
goto out_free_tio_cache;
|
||||
|
||||
_rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0);
|
||||
if (!_rq_bio_info_cache)
|
||||
goto out_free_rq_tio_cache;
|
||||
|
||||
r = dm_uevent_init();
|
||||
if (r)
|
||||
goto out_free_tio_cache;
|
||||
goto out_free_rq_bio_info_cache;
|
||||
|
||||
_major = major;
|
||||
r = register_blkdev(_major, _name);
|
||||
|
@ -180,6 +216,10 @@ static int __init local_init(void)
|
|||
|
||||
out_uevent_exit:
|
||||
dm_uevent_exit();
|
||||
out_free_rq_bio_info_cache:
|
||||
kmem_cache_destroy(_rq_bio_info_cache);
|
||||
out_free_rq_tio_cache:
|
||||
kmem_cache_destroy(_rq_tio_cache);
|
||||
out_free_tio_cache:
|
||||
kmem_cache_destroy(_tio_cache);
|
||||
out_free_io_cache:
|
||||
|
@ -190,6 +230,8 @@ static int __init local_init(void)
|
|||
|
||||
static void local_exit(void)
|
||||
{
|
||||
kmem_cache_destroy(_rq_bio_info_cache);
|
||||
kmem_cache_destroy(_rq_tio_cache);
|
||||
kmem_cache_destroy(_tio_cache);
|
||||
kmem_cache_destroy(_io_cache);
|
||||
unregister_blkdev(_major, _name);
|
||||
|
@ -796,7 +838,11 @@ static int __split_bio(struct mapped_device *md, struct bio *bio)
|
|||
ci.map = dm_get_table(md);
|
||||
if (unlikely(!ci.map))
|
||||
return -EIO;
|
||||
|
||||
if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
|
||||
dm_table_put(ci.map);
|
||||
bio_endio(bio, -EOPNOTSUPP);
|
||||
return 0;
|
||||
}
|
||||
ci.md = md;
|
||||
ci.bio = bio;
|
||||
ci.io = alloc_io(md);
|
||||
|
@ -880,15 +926,6 @@ static int dm_request(struct request_queue *q, struct bio *bio)
|
|||
struct mapped_device *md = q->queuedata;
|
||||
int cpu;
|
||||
|
||||
/*
|
||||
* There is no use in forwarding any barrier request since we can't
|
||||
* guarantee it is (or can be) handled by the targets correctly.
|
||||
*/
|
||||
if (unlikely(bio_barrier(bio))) {
|
||||
bio_endio(bio, -EOPNOTSUPP);
|
||||
return 0;
|
||||
}
|
||||
|
||||
down_read(&md->io_lock);
|
||||
|
||||
cpu = part_stat_lock();
|
||||
|
@ -943,8 +980,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
|
|||
struct mapped_device *md = congested_data;
|
||||
struct dm_table *map;
|
||||
|
||||
atomic_inc(&md->pending);
|
||||
|
||||
if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
|
||||
map = dm_get_table(md);
|
||||
if (map) {
|
||||
|
@ -953,10 +988,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
|
|||
}
|
||||
}
|
||||
|
||||
if (!atomic_dec_return(&md->pending))
|
||||
/* nudge anyone waiting on suspend queue */
|
||||
wake_up(&md->wait);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -1216,10 +1247,12 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
|
|||
|
||||
if (md->suspended_bdev)
|
||||
__set_size(md, size);
|
||||
if (size == 0)
|
||||
return 0;
|
||||
|
||||
dm_table_get(t);
|
||||
if (!size) {
|
||||
dm_table_destroy(t);
|
||||
return 0;
|
||||
}
|
||||
|
||||
dm_table_event_callback(t, event_callback, md);
|
||||
|
||||
write_lock(&md->map_lock);
|
||||
|
@ -1241,7 +1274,7 @@ static void __unbind(struct mapped_device *md)
|
|||
write_lock(&md->map_lock);
|
||||
md->map = NULL;
|
||||
write_unlock(&md->map_lock);
|
||||
dm_table_put(map);
|
||||
dm_table_destroy(map);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1255,6 +1288,8 @@ int dm_create(int minor, struct mapped_device **result)
|
|||
if (!md)
|
||||
return -ENXIO;
|
||||
|
||||
dm_sysfs_init(md);
|
||||
|
||||
*result = md;
|
||||
return 0;
|
||||
}
|
||||
|
@ -1330,8 +1365,9 @@ void dm_put(struct mapped_device *md)
|
|||
dm_table_presuspend_targets(map);
|
||||
dm_table_postsuspend_targets(map);
|
||||
}
|
||||
__unbind(md);
|
||||
dm_sysfs_exit(md);
|
||||
dm_table_put(map);
|
||||
__unbind(md);
|
||||
free_dev(md);
|
||||
}
|
||||
}
|
||||
|
@ -1669,6 +1705,27 @@ struct gendisk *dm_disk(struct mapped_device *md)
|
|||
return md->disk;
|
||||
}
|
||||
|
||||
struct kobject *dm_kobject(struct mapped_device *md)
|
||||
{
|
||||
return &md->kobj;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct mapped_device should not be exported outside of dm.c
|
||||
* so use this check to verify that kobj is part of md structure
|
||||
*/
|
||||
struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
|
||||
{
|
||||
struct mapped_device *md;
|
||||
|
||||
md = container_of(kobj, struct mapped_device, kobj);
|
||||
if (&md->kobj != kobj)
|
||||
return NULL;
|
||||
|
||||
dm_get(md);
|
||||
return md;
|
||||
}
|
||||
|
||||
int dm_suspended(struct mapped_device *md)
|
||||
{
|
||||
return test_bit(DMF_SUSPENDED, &md->flags);
|
||||
|
|
|
@ -36,6 +36,7 @@ struct dm_table;
|
|||
/*-----------------------------------------------------------------
|
||||
* Internal table functions.
|
||||
*---------------------------------------------------------------*/
|
||||
void dm_table_destroy(struct dm_table *t);
|
||||
void dm_table_event_callback(struct dm_table *t,
|
||||
void (*fn)(void *), void *context);
|
||||
struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
|
||||
|
@ -51,6 +52,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits);
|
|||
* To check the return value from dm_table_find_target().
|
||||
*/
|
||||
#define dm_target_is_valid(t) ((t)->table)
|
||||
int dm_table_barrier_ok(struct dm_table *t);
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* A registry of target types.
|
||||
|
@ -71,6 +73,14 @@ int dm_split_args(int *argc, char ***argvp, char *input);
|
|||
int dm_interface_init(void);
|
||||
void dm_interface_exit(void);
|
||||
|
||||
/*
|
||||
* sysfs interface
|
||||
*/
|
||||
int dm_sysfs_init(struct mapped_device *md);
|
||||
void dm_sysfs_exit(struct mapped_device *md);
|
||||
struct kobject *dm_kobject(struct mapped_device *md);
|
||||
struct mapped_device *dm_get_from_kobject(struct kobject *kobj);
|
||||
|
||||
/*
|
||||
* Targets for linear and striped mappings
|
||||
*/
|
||||
|
|
|
@ -45,6 +45,8 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti);
|
|||
*/
|
||||
typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio,
|
||||
union map_info *map_context);
|
||||
typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
|
||||
union map_info *map_context);
|
||||
|
||||
/*
|
||||
* Returns:
|
||||
|
@ -57,6 +59,9 @@ typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio,
|
|||
typedef int (*dm_endio_fn) (struct dm_target *ti,
|
||||
struct bio *bio, int error,
|
||||
union map_info *map_context);
|
||||
typedef int (*dm_request_endio_fn) (struct dm_target *ti,
|
||||
struct request *clone, int error,
|
||||
union map_info *map_context);
|
||||
|
||||
typedef void (*dm_flush_fn) (struct dm_target *ti);
|
||||
typedef void (*dm_presuspend_fn) (struct dm_target *ti);
|
||||
|
@ -75,6 +80,13 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd,
|
|||
typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm,
|
||||
struct bio_vec *biovec, int max_size);
|
||||
|
||||
/*
|
||||
* Returns:
|
||||
* 0: The target can handle the next I/O immediately.
|
||||
* 1: The target can't handle the next I/O immediately.
|
||||
*/
|
||||
typedef int (*dm_busy_fn) (struct dm_target *ti);
|
||||
|
||||
void dm_error(const char *message);
|
||||
|
||||
/*
|
||||
|
@ -100,14 +112,23 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d);
|
|||
/*
|
||||
* Information about a target type
|
||||
*/
|
||||
|
||||
/*
|
||||
* Target features
|
||||
*/
|
||||
#define DM_TARGET_SUPPORTS_BARRIERS 0x00000001
|
||||
|
||||
struct target_type {
|
||||
uint64_t features;
|
||||
const char *name;
|
||||
struct module *module;
|
||||
unsigned version[3];
|
||||
dm_ctr_fn ctr;
|
||||
dm_dtr_fn dtr;
|
||||
dm_map_fn map;
|
||||
dm_map_request_fn map_rq;
|
||||
dm_endio_fn end_io;
|
||||
dm_request_endio_fn rq_end_io;
|
||||
dm_flush_fn flush;
|
||||
dm_presuspend_fn presuspend;
|
||||
dm_postsuspend_fn postsuspend;
|
||||
|
@ -117,6 +138,7 @@ struct target_type {
|
|||
dm_message_fn message;
|
||||
dm_ioctl_fn ioctl;
|
||||
dm_merge_fn merge;
|
||||
dm_busy_fn busy;
|
||||
};
|
||||
|
||||
struct io_restrictions {
|
||||
|
@ -157,8 +179,7 @@ struct dm_target {
|
|||
};
|
||||
|
||||
int dm_register_target(struct target_type *t);
|
||||
int dm_unregister_target(struct target_type *t);
|
||||
|
||||
void dm_unregister_target(struct target_type *t);
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* Functions for creating and manipulating mapped devices.
|
||||
|
@ -276,6 +297,9 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
|
|||
*---------------------------------------------------------------*/
|
||||
#define DM_NAME "device-mapper"
|
||||
|
||||
#define DMCRIT(f, arg...) \
|
||||
printk(KERN_CRIT DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
|
||||
|
||||
#define DMERR(f, arg...) \
|
||||
printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
|
||||
#define DMERR_LIMIT(f, arg...) \
|
||||
|
|
Loading…
Reference in a new issue