dlm: fix unlock balance warnings
The in_recovery rw_semaphore has always been acquired and released by different threads by design. To work around the "BUG: bad unlock balance detected!" messages, adjust things so the dlm_recoverd thread always does both down_write and up_write. Signed-off-by: David Teigland <teigland@redhat.com>
This commit is contained in:
parent
6ad2291624
commit
475f230c60
6 changed files with 78 additions and 30 deletions
|
@ -604,6 +604,7 @@ struct dlm_ls {
|
|||
struct idr ls_recover_idr;
|
||||
spinlock_t ls_recover_idr_lock;
|
||||
wait_queue_head_t ls_wait_general;
|
||||
wait_queue_head_t ls_recover_lock_wait;
|
||||
struct mutex ls_clear_proc_locks;
|
||||
|
||||
struct list_head ls_root_list; /* root resources */
|
||||
|
@ -616,15 +617,40 @@ struct dlm_ls {
|
|||
char ls_name[1];
|
||||
};
|
||||
|
||||
#define LSFL_WORK 0
|
||||
#define LSFL_RUNNING 1
|
||||
#define LSFL_RECOVERY_STOP 2
|
||||
#define LSFL_RCOM_READY 3
|
||||
#define LSFL_RCOM_WAIT 4
|
||||
#define LSFL_UEVENT_WAIT 5
|
||||
#define LSFL_TIMEWARN 6
|
||||
#define LSFL_CB_DELAY 7
|
||||
#define LSFL_NODIR 8
|
||||
/*
|
||||
* LSFL_RECOVER_STOP - dlm_ls_stop() sets this to tell dlm recovery routines
|
||||
* that they should abort what they're doing so new recovery can be started.
|
||||
*
|
||||
* LSFL_RECOVER_DOWN - dlm_ls_stop() sets this to tell dlm_recoverd that it
|
||||
* should do down_write() on the in_recovery rw_semaphore. (doing down_write
|
||||
* within dlm_ls_stop causes complaints about the lock acquired/released
|
||||
* in different contexts.)
|
||||
*
|
||||
* LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore.
|
||||
* It sets this after it is done with down_write() on the in_recovery
|
||||
* rw_semaphore and clears it after it has released the rw_semaphore.
|
||||
*
|
||||
* LSFL_RECOVER_WORK - dlm_ls_start() sets this to tell dlm_recoverd that it
|
||||
* should begin recovery of the lockspace.
|
||||
*
|
||||
* LSFL_RUNNING - set when normal locking activity is enabled.
|
||||
* dlm_ls_stop() clears this to tell dlm locking routines that they should
|
||||
* quit what they are doing so recovery can run. dlm_recoverd sets
|
||||
* this after recovery is finished.
|
||||
*/
|
||||
|
||||
#define LSFL_RECOVER_STOP 0
|
||||
#define LSFL_RECOVER_DOWN 1
|
||||
#define LSFL_RECOVER_LOCK 2
|
||||
#define LSFL_RECOVER_WORK 3
|
||||
#define LSFL_RUNNING 4
|
||||
|
||||
#define LSFL_RCOM_READY 5
|
||||
#define LSFL_RCOM_WAIT 6
|
||||
#define LSFL_UEVENT_WAIT 7
|
||||
#define LSFL_TIMEWARN 8
|
||||
#define LSFL_CB_DELAY 9
|
||||
#define LSFL_NODIR 10
|
||||
|
||||
/* much of this is just saving user space pointers associated with the
|
||||
lock that we pass back to the user lib with an ast */
|
||||
|
@ -667,7 +693,7 @@ static inline int dlm_locking_stopped(struct dlm_ls *ls)
|
|||
|
||||
static inline int dlm_recovery_stopped(struct dlm_ls *ls)
|
||||
{
|
||||
return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
|
||||
return test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
|
||||
}
|
||||
|
||||
static inline int dlm_no_directory(struct dlm_ls *ls)
|
||||
|
|
|
@ -582,8 +582,6 @@ static int new_lockspace(const char *name, const char *cluster,
|
|||
INIT_LIST_HEAD(&ls->ls_root_list);
|
||||
init_rwsem(&ls->ls_root_sem);
|
||||
|
||||
down_write(&ls->ls_in_recovery);
|
||||
|
||||
spin_lock(&lslist_lock);
|
||||
ls->ls_create_count = 1;
|
||||
list_add(&ls->ls_list, &lslist);
|
||||
|
@ -597,13 +595,24 @@ static int new_lockspace(const char *name, const char *cluster,
|
|||
}
|
||||
}
|
||||
|
||||
/* needs to find ls in lslist */
|
||||
init_waitqueue_head(&ls->ls_recover_lock_wait);
|
||||
|
||||
/*
|
||||
* Once started, dlm_recoverd first looks for ls in lslist, then
|
||||
* initializes ls_in_recovery as locked in "down" mode. We need
|
||||
* to wait for the wakeup from dlm_recoverd because in_recovery
|
||||
* has to start out in down mode.
|
||||
*/
|
||||
|
||||
error = dlm_recoverd_start(ls);
|
||||
if (error) {
|
||||
log_error(ls, "can't start dlm_recoverd %d", error);
|
||||
goto out_callback;
|
||||
}
|
||||
|
||||
wait_event(ls->ls_recover_lock_wait,
|
||||
test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
|
||||
|
||||
ls->ls_kobj.kset = dlm_kset;
|
||||
error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
|
||||
"%s", ls->ls_name);
|
||||
|
|
|
@ -616,13 +616,13 @@ int dlm_ls_stop(struct dlm_ls *ls)
|
|||
down_write(&ls->ls_recv_active);
|
||||
|
||||
/*
|
||||
* Abort any recovery that's in progress (see RECOVERY_STOP,
|
||||
* Abort any recovery that's in progress (see RECOVER_STOP,
|
||||
* dlm_recovery_stopped()) and tell any other threads running in the
|
||||
* dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
|
||||
*/
|
||||
|
||||
spin_lock(&ls->ls_recover_lock);
|
||||
set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
|
||||
set_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
|
||||
new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags);
|
||||
ls->ls_recover_seq++;
|
||||
spin_unlock(&ls->ls_recover_lock);
|
||||
|
@ -642,12 +642,16 @@ int dlm_ls_stop(struct dlm_ls *ls)
|
|||
* when recovery is complete.
|
||||
*/
|
||||
|
||||
if (new)
|
||||
down_write(&ls->ls_in_recovery);
|
||||
if (new) {
|
||||
set_bit(LSFL_RECOVER_DOWN, &ls->ls_flags);
|
||||
wake_up_process(ls->ls_recoverd_task);
|
||||
wait_event(ls->ls_recover_lock_wait,
|
||||
test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
|
||||
}
|
||||
|
||||
/*
|
||||
* The recoverd suspend/resume makes sure that dlm_recoverd (if
|
||||
* running) has noticed RECOVERY_STOP above and quit processing the
|
||||
* running) has noticed RECOVER_STOP above and quit processing the
|
||||
* previous recovery.
|
||||
*/
|
||||
|
||||
|
@ -709,7 +713,8 @@ int dlm_ls_start(struct dlm_ls *ls)
|
|||
kfree(rv_old);
|
||||
}
|
||||
|
||||
dlm_recoverd_kick(ls);
|
||||
set_bit(LSFL_RECOVER_WORK, &ls->ls_flags);
|
||||
wake_up_process(ls->ls_recoverd_task);
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
|
|
|
@ -581,7 +581,7 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
|
|||
|
||||
spin_lock(&ls->ls_recover_lock);
|
||||
status = ls->ls_recover_status;
|
||||
stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
|
||||
stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
|
||||
seq = ls->ls_recover_seq;
|
||||
spin_unlock(&ls->ls_recover_lock);
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@ static int enable_locking(struct dlm_ls *ls, uint64_t seq)
|
|||
set_bit(LSFL_RUNNING, &ls->ls_flags);
|
||||
/* unblocks processes waiting to enter the dlm */
|
||||
up_write(&ls->ls_in_recovery);
|
||||
clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
|
||||
error = 0;
|
||||
}
|
||||
spin_unlock(&ls->ls_recover_lock);
|
||||
|
@ -262,7 +263,7 @@ static void do_ls_recovery(struct dlm_ls *ls)
|
|||
rv = ls->ls_recover_args;
|
||||
ls->ls_recover_args = NULL;
|
||||
if (rv && ls->ls_recover_seq == rv->seq)
|
||||
clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
|
||||
clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
|
||||
spin_unlock(&ls->ls_recover_lock);
|
||||
|
||||
if (rv) {
|
||||
|
@ -282,26 +283,34 @@ static int dlm_recoverd(void *arg)
|
|||
return -1;
|
||||
}
|
||||
|
||||
down_write(&ls->ls_in_recovery);
|
||||
set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
|
||||
wake_up(&ls->ls_recover_lock_wait);
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
if (!test_bit(LSFL_WORK, &ls->ls_flags))
|
||||
if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) &&
|
||||
!test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags))
|
||||
schedule();
|
||||
set_current_state(TASK_RUNNING);
|
||||
|
||||
if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags))
|
||||
if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
|
||||
down_write(&ls->ls_in_recovery);
|
||||
set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
|
||||
wake_up(&ls->ls_recover_lock_wait);
|
||||
}
|
||||
|
||||
if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags))
|
||||
do_ls_recovery(ls);
|
||||
}
|
||||
|
||||
if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags))
|
||||
up_write(&ls->ls_in_recovery);
|
||||
|
||||
dlm_put_lockspace(ls);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dlm_recoverd_kick(struct dlm_ls *ls)
|
||||
{
|
||||
set_bit(LSFL_WORK, &ls->ls_flags);
|
||||
wake_up_process(ls->ls_recoverd_task);
|
||||
}
|
||||
|
||||
int dlm_recoverd_start(struct dlm_ls *ls)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
#ifndef __RECOVERD_DOT_H__
|
||||
#define __RECOVERD_DOT_H__
|
||||
|
||||
void dlm_recoverd_kick(struct dlm_ls *ls);
|
||||
void dlm_recoverd_stop(struct dlm_ls *ls);
|
||||
int dlm_recoverd_start(struct dlm_ls *ls);
|
||||
void dlm_recoverd_suspend(struct dlm_ls *ls);
|
||||
|
|
Loading…
Reference in a new issue