GFS2: dlm based recovery coordination
This new method of managing recovery is an alternative to the previous approach of using the userland gfs_controld. - use dlm slot numbers to assign journal id's - use dlm recovery callbacks to initiate journal recovery - use a dlm lock to determine the first node to mount fs - use a dlm lock to track journals that need recovery Signed-off-by: David Teigland <teigland@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
This commit is contained in:
parent
e343a895a9
commit
e0c2a9aa1e
10 changed files with 1098 additions and 42 deletions
|
@ -1353,7 +1353,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
|
|||
spin_lock(&gl->gl_spin);
|
||||
gl->gl_reply = ret;
|
||||
|
||||
if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
|
||||
if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) {
|
||||
if (gfs2_should_freeze(gl)) {
|
||||
set_bit(GLF_FROZEN, &gl->gl_flags);
|
||||
spin_unlock(&gl->gl_spin);
|
||||
|
|
|
@ -121,8 +121,11 @@ enum {
|
|||
|
||||
struct lm_lockops {
|
||||
const char *lm_proto_name;
|
||||
int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
|
||||
void (*lm_unmount) (struct gfs2_sbd *sdp);
|
||||
int (*lm_mount) (struct gfs2_sbd *sdp, const char *table);
|
||||
void (*lm_first_done) (struct gfs2_sbd *sdp);
|
||||
void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid,
|
||||
unsigned int result);
|
||||
void (*lm_unmount) (struct gfs2_sbd *sdp);
|
||||
void (*lm_withdraw) (struct gfs2_sbd *sdp);
|
||||
void (*lm_put_lock) (struct gfs2_glock *gl);
|
||||
int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
|
||||
|
|
|
@ -139,8 +139,45 @@ struct gfs2_bufdata {
|
|||
#define GDLM_STRNAME_BYTES 25
|
||||
#define GDLM_LVB_SIZE 32
|
||||
|
||||
/*
|
||||
* ls_recover_flags:
|
||||
*
|
||||
* DFL_BLOCK_LOCKS: dlm is in recovery and will grant locks that had been
|
||||
* held by failed nodes whose journals need recovery. Those locks should
|
||||
* only be used for journal recovery until the journal recovery is done.
|
||||
* This is set by the dlm recover_prep callback and cleared by the
|
||||
* gfs2_control thread when journal recovery is complete. To avoid
|
||||
* races between recover_prep setting and gfs2_control clearing, recover_spin
|
||||
* is held while changing this bit and reading/writing recover_block
|
||||
* and recover_start.
|
||||
*
|
||||
* DFL_NO_DLM_OPS: dlm lockspace ops/callbacks are not being used.
|
||||
*
|
||||
* DFL_FIRST_MOUNT: this node is the first to mount this fs and is doing
|
||||
* recovery of all journals before allowing other nodes to mount the fs.
|
||||
* This is cleared when FIRST_MOUNT_DONE is set.
|
||||
*
|
||||
* DFL_FIRST_MOUNT_DONE: this node was the first mounter, and has finished
|
||||
* recovery of all journals, and now allows other nodes to mount the fs.
|
||||
*
|
||||
* DFL_MOUNT_DONE: gdlm_mount has completed successfully and cleared
|
||||
* BLOCK_LOCKS for the first time. The gfs2_control thread should now
|
||||
* control clearing BLOCK_LOCKS for further recoveries.
|
||||
*
|
||||
* DFL_UNMOUNT: gdlm_unmount sets to keep sdp off gfs2_control_wq.
|
||||
*
|
||||
* DFL_DLM_RECOVERY: set while dlm is in recovery, between recover_prep()
|
||||
* and recover_done(), i.e. set while recover_block == recover_start.
|
||||
*/
|
||||
|
||||
enum {
|
||||
DFL_BLOCK_LOCKS = 0,
|
||||
DFL_NO_DLM_OPS = 1,
|
||||
DFL_FIRST_MOUNT = 2,
|
||||
DFL_FIRST_MOUNT_DONE = 3,
|
||||
DFL_MOUNT_DONE = 4,
|
||||
DFL_UNMOUNT = 5,
|
||||
DFL_DLM_RECOVERY = 6,
|
||||
};
|
||||
|
||||
struct lm_lockname {
|
||||
|
@ -499,14 +536,26 @@ struct gfs2_sb_host {
|
|||
struct lm_lockstruct {
|
||||
int ls_jid;
|
||||
unsigned int ls_first;
|
||||
unsigned int ls_first_done;
|
||||
unsigned int ls_nodir;
|
||||
const struct lm_lockops *ls_ops;
|
||||
unsigned long ls_flags;
|
||||
dlm_lockspace_t *ls_dlm;
|
||||
|
||||
int ls_recover_jid_done;
|
||||
int ls_recover_jid_status;
|
||||
int ls_recover_jid_done; /* These two are deprecated, */
|
||||
int ls_recover_jid_status; /* used previously by gfs_controld */
|
||||
|
||||
struct dlm_lksb ls_mounted_lksb; /* mounted_lock */
|
||||
struct dlm_lksb ls_control_lksb; /* control_lock */
|
||||
char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */
|
||||
struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */
|
||||
|
||||
spinlock_t ls_recover_spin; /* protects following fields */
|
||||
unsigned long ls_recover_flags; /* DFL_ */
|
||||
uint32_t ls_recover_mount; /* gen in first recover_done cb */
|
||||
uint32_t ls_recover_start; /* gen in last recover_done cb */
|
||||
uint32_t ls_recover_block; /* copy recover_start in last recover_prep */
|
||||
uint32_t ls_recover_size; /* size of recover_submit, recover_result */
|
||||
uint32_t *ls_recover_submit; /* gen in last recover_slot cb per jid */
|
||||
uint32_t *ls_recover_result; /* result of last jid recovery */
|
||||
};
|
||||
|
||||
struct gfs2_sbd {
|
||||
|
@ -544,6 +593,7 @@ struct gfs2_sbd {
|
|||
wait_queue_head_t sd_glock_wait;
|
||||
atomic_t sd_glock_disposal;
|
||||
struct completion sd_locking_init;
|
||||
struct delayed_work sd_control_work;
|
||||
|
||||
/* Inode Stuff */
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -28,6 +28,8 @@
|
|||
#include "recovery.h"
|
||||
#include "dir.h"
|
||||
|
||||
struct workqueue_struct *gfs2_control_wq;
|
||||
|
||||
static struct shrinker qd_shrinker = {
|
||||
.shrink = gfs2_shrink_qd_memory,
|
||||
.seeks = DEFAULT_SEEKS,
|
||||
|
@ -146,12 +148,19 @@ static int __init init_gfs2_fs(void)
|
|||
if (!gfs_recovery_wq)
|
||||
goto fail_wq;
|
||||
|
||||
gfs2_control_wq = alloc_workqueue("gfs2_control",
|
||||
WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0);
|
||||
if (!gfs2_control_wq)
|
||||
goto fail_control;
|
||||
|
||||
gfs2_register_debugfs();
|
||||
|
||||
printk("GFS2 installed\n");
|
||||
|
||||
return 0;
|
||||
|
||||
fail_control:
|
||||
destroy_workqueue(gfs_recovery_wq);
|
||||
fail_wq:
|
||||
unregister_filesystem(&gfs2meta_fs_type);
|
||||
fail_unregister:
|
||||
|
@ -195,6 +204,7 @@ static void __exit exit_gfs2_fs(void)
|
|||
unregister_filesystem(&gfs2_fs_type);
|
||||
unregister_filesystem(&gfs2meta_fs_type);
|
||||
destroy_workqueue(gfs_recovery_wq);
|
||||
destroy_workqueue(gfs2_control_wq);
|
||||
|
||||
rcu_barrier();
|
||||
|
||||
|
|
|
@ -562,8 +562,12 @@ static void gfs2_others_may_mount(struct gfs2_sbd *sdp)
|
|||
{
|
||||
char *message = "FIRSTMOUNT=Done";
|
||||
char *envp[] = { message, NULL };
|
||||
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
|
||||
ls->ls_first_done = 1;
|
||||
|
||||
fs_info(sdp, "first mount done, others may mount\n");
|
||||
|
||||
if (sdp->sd_lockstruct.ls_ops->lm_first_done)
|
||||
sdp->sd_lockstruct.ls_ops->lm_first_done(sdp);
|
||||
|
||||
kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
|
||||
}
|
||||
|
||||
|
@ -944,7 +948,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
|
|||
struct gfs2_args *args = &sdp->sd_args;
|
||||
const char *proto = sdp->sd_proto_name;
|
||||
const char *table = sdp->sd_table_name;
|
||||
const char *fsname;
|
||||
char *o, *options;
|
||||
int ret;
|
||||
|
||||
|
@ -1004,21 +1007,12 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
|
|||
}
|
||||
}
|
||||
|
||||
if (sdp->sd_args.ar_spectator)
|
||||
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
|
||||
else
|
||||
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
|
||||
sdp->sd_lockstruct.ls_jid);
|
||||
|
||||
fsname = strchr(table, ':');
|
||||
if (fsname)
|
||||
fsname++;
|
||||
if (lm->lm_mount == NULL) {
|
||||
fs_info(sdp, "Now mounting FS...\n");
|
||||
complete_all(&sdp->sd_locking_init);
|
||||
return 0;
|
||||
}
|
||||
ret = lm->lm_mount(sdp, fsname);
|
||||
ret = lm->lm_mount(sdp, table);
|
||||
if (ret == 0)
|
||||
fs_info(sdp, "Joined cluster. Now mounting FS...\n");
|
||||
complete_all(&sdp->sd_locking_init);
|
||||
|
@ -1124,6 +1118,8 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
|
|||
if (error)
|
||||
goto fail;
|
||||
|
||||
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name);
|
||||
|
||||
gfs2_create_debugfs_file(sdp);
|
||||
|
||||
error = gfs2_sys_fs_add(sdp);
|
||||
|
@ -1160,6 +1156,13 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
|
|||
goto fail_sb;
|
||||
}
|
||||
|
||||
if (sdp->sd_args.ar_spectator)
|
||||
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s",
|
||||
sdp->sd_table_name);
|
||||
else
|
||||
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u",
|
||||
sdp->sd_table_name, sdp->sd_lockstruct.ls_jid);
|
||||
|
||||
error = init_inodes(sdp, DO);
|
||||
if (error)
|
||||
goto fail_sb;
|
||||
|
|
|
@ -436,12 +436,16 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
|
|||
char env_status[20];
|
||||
char *envp[] = { env_jid, env_status, NULL };
|
||||
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
|
||||
|
||||
ls->ls_recover_jid_done = jid;
|
||||
ls->ls_recover_jid_status = message;
|
||||
sprintf(env_jid, "JID=%d", jid);
|
||||
sprintf(env_status, "RECOVERY=%s",
|
||||
message == LM_RD_SUCCESS ? "Done" : "Failed");
|
||||
kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
|
||||
|
||||
if (sdp->sd_lockstruct.ls_ops->lm_recovery_result)
|
||||
sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
|
||||
}
|
||||
|
||||
void gfs2_recover_func(struct work_struct *work)
|
||||
|
|
|
@ -298,7 +298,7 @@ static ssize_t block_show(struct gfs2_sbd *sdp, char *buf)
|
|||
ssize_t ret;
|
||||
int val = 0;
|
||||
|
||||
if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))
|
||||
if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))
|
||||
val = 1;
|
||||
ret = sprintf(buf, "%d\n", val);
|
||||
return ret;
|
||||
|
@ -313,9 +313,9 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
|
|||
val = simple_strtol(buf, NULL, 0);
|
||||
|
||||
if (val == 1)
|
||||
set_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
|
||||
set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
|
||||
else if (val == 0) {
|
||||
clear_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
|
||||
clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
|
||||
smp_mb__after_clear_bit();
|
||||
gfs2_glock_thaw(sdp);
|
||||
} else {
|
||||
|
@ -350,8 +350,8 @@ static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
|
|||
goto out;
|
||||
if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
|
||||
goto out;
|
||||
sdp->sd_lockstruct.ls_first = first;
|
||||
rv = 0;
|
||||
sdp->sd_lockstruct.ls_first = first;
|
||||
rv = 0;
|
||||
out:
|
||||
spin_unlock(&sdp->sd_jindex_spin);
|
||||
return rv ? rv : len;
|
||||
|
@ -360,19 +360,14 @@ static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
|
|||
static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
|
||||
{
|
||||
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
|
||||
return sprintf(buf, "%d\n", ls->ls_first_done);
|
||||
return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags));
|
||||
}
|
||||
|
||||
static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
|
||||
int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid)
|
||||
{
|
||||
unsigned jid;
|
||||
struct gfs2_jdesc *jd;
|
||||
int rv;
|
||||
|
||||
rv = sscanf(buf, "%u", &jid);
|
||||
if (rv != 1)
|
||||
return -EINVAL;
|
||||
|
||||
rv = -ESHUTDOWN;
|
||||
spin_lock(&sdp->sd_jindex_spin);
|
||||
if (test_bit(SDF_NORECOVERY, &sdp->sd_flags))
|
||||
|
@ -389,6 +384,20 @@ static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
|
|||
}
|
||||
out:
|
||||
spin_unlock(&sdp->sd_jindex_spin);
|
||||
return rv;
|
||||
}
|
||||
|
||||
static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
|
||||
{
|
||||
unsigned jid;
|
||||
int rv;
|
||||
|
||||
rv = sscanf(buf, "%u", &jid);
|
||||
if (rv != 1)
|
||||
return -EINVAL;
|
||||
|
||||
rv = gfs2_recover_set(sdp, jid);
|
||||
|
||||
return rv ? rv : len;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,5 +19,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
|
|||
int gfs2_sys_init(void);
|
||||
void gfs2_sys_uninit(void);
|
||||
|
||||
int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid);
|
||||
|
||||
#endif /* __SYS_DOT_H__ */
|
||||
|
||||
|
|
|
@ -22,6 +22,8 @@
|
|||
#define GFS2_LIVE_LOCK 1
|
||||
#define GFS2_TRANS_LOCK 2
|
||||
#define GFS2_RENAME_LOCK 3
|
||||
#define GFS2_CONTROL_LOCK 4
|
||||
#define GFS2_MOUNTED_LOCK 5
|
||||
|
||||
/* Format numbers for various metadata types */
|
||||
|
||||
|
|
Loading…
Reference in a new issue