[PATCH] NLM: fix a client-side race on blocking locks.

If the lock blocks, the server may send us a GRANTED message that
 races with the reply to our LOCK request. Make sure that we catch
 the GRANTED by queueing up our request on the nlm_blocked list
 before we send off the first LOCK rpc call.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
This commit is contained in:
Trond Myklebust 2005-06-22 17:16:31 +00:00
parent 4f15e2b1f4
commit ecdbf769b2
3 changed files with 96 additions and 50 deletions

View file

@ -41,24 +41,52 @@ struct nlm_wait {
static LIST_HEAD(nlm_blocked); static LIST_HEAD(nlm_blocked);
/*
* Queue up a lock for blocking so that the GRANTED request can see it
*/
int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl)
{
struct nlm_wait *block;
BUG_ON(req->a_block != NULL);
block = kmalloc(sizeof(*block), GFP_KERNEL);
if (block == NULL)
return -ENOMEM;
block->b_host = host;
block->b_lock = fl;
init_waitqueue_head(&block->b_wait);
block->b_status = NLM_LCK_BLOCKED;
list_add(&block->b_list, &nlm_blocked);
req->a_block = block;
return 0;
}
void nlmclnt_finish_block(struct nlm_rqst *req)
{
struct nlm_wait *block = req->a_block;
if (block == NULL)
return;
req->a_block = NULL;
list_del(&block->b_list);
kfree(block);
}
/* /*
* Block on a lock * Block on a lock
*/ */
int long nlmclnt_block(struct nlm_rqst *req, long timeout)
nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp)
{ {
struct nlm_wait block, **head; struct nlm_wait *block = req->a_block;
int err; long ret;
u32 pstate;
block.b_host = host; /* A borken server might ask us to block even if we didn't
block.b_lock = fl; * request it. Just say no!
init_waitqueue_head(&block.b_wait); */
block.b_status = NLM_LCK_BLOCKED; if (!req->a_args.block)
list_add(&block.b_list, &nlm_blocked); return -EAGAIN;
/* Remember pseudo nsm state */
pstate = host->h_state;
/* Go to sleep waiting for GRANT callback. Some servers seem /* Go to sleep waiting for GRANT callback. Some servers seem
* to lose callbacks, however, so we're going to poll from * to lose callbacks, however, so we're going to poll from
@ -68,23 +96,16 @@ nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp)
* a 1 minute timeout would do. See the comment before * a 1 minute timeout would do. See the comment before
* nlmclnt_lock for an explanation. * nlmclnt_lock for an explanation.
*/ */
sleep_on_timeout(&block.b_wait, 30*HZ); ret = wait_event_interruptible_timeout(block->b_wait,
block->b_status != NLM_LCK_BLOCKED,
timeout);
list_del(&block.b_list); if (block->b_status != NLM_LCK_BLOCKED) {
req->a_res.status = block->b_status;
if (!signalled()) { block->b_status = NLM_LCK_BLOCKED;
*statp = block.b_status;
return 0;
} }
/* Okay, we were interrupted. Cancel the pending request return ret;
* unless the server has rebooted.
*/
if (pstate == host->h_state && (err = nlmclnt_cancel(host, fl)) < 0)
printk(KERN_NOTICE
"lockd: CANCEL call failed (errno %d)\n", -err);
return -ERESTARTSYS;
} }
/* /*
@ -94,27 +115,23 @@ u32
nlmclnt_grant(struct nlm_lock *lock) nlmclnt_grant(struct nlm_lock *lock)
{ {
struct nlm_wait *block; struct nlm_wait *block;
u32 res = nlm_lck_denied;
/* /*
* Look up blocked request based on arguments. * Look up blocked request based on arguments.
* Warning: must not use cookie to match it! * Warning: must not use cookie to match it!
*/ */
list_for_each_entry(block, &nlm_blocked, b_list) { list_for_each_entry(block, &nlm_blocked, b_list) {
if (nlm_compare_locks(block->b_lock, &lock->fl)) if (nlm_compare_locks(block->b_lock, &lock->fl)) {
break; /* Alright, we found a lock. Set the return status
} * and wake up the caller
/* Ooops, no blocked request found. */
if (block == NULL)
return nlm_lck_denied;
/* Alright, we found the lock. Set the return status and
* wake up the caller.
*/ */
block->b_status = NLM_LCK_GRANTED; block->b_status = NLM_LCK_GRANTED;
wake_up(&block->b_wait); wake_up(&block->b_wait);
res = nlm_granted;
return nlm_granted; }
}
return res;
} }
/* /*

View file

@ -21,6 +21,7 @@
#define NLMDBG_FACILITY NLMDBG_CLIENT #define NLMDBG_FACILITY NLMDBG_CLIENT
#define NLMCLNT_GRACE_WAIT (5*HZ) #define NLMCLNT_GRACE_WAIT (5*HZ)
#define NLMCLNT_POLL_TIMEOUT (30*HZ)
static int nlmclnt_test(struct nlm_rqst *, struct file_lock *); static int nlmclnt_test(struct nlm_rqst *, struct file_lock *);
static int nlmclnt_lock(struct nlm_rqst *, struct file_lock *); static int nlmclnt_lock(struct nlm_rqst *, struct file_lock *);
@ -553,6 +554,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
{ {
struct nlm_host *host = req->a_host; struct nlm_host *host = req->a_host;
struct nlm_res *resp = &req->a_res; struct nlm_res *resp = &req->a_res;
long timeout;
int status; int status;
if (!host->h_monitored && nsm_monitor(host) < 0) { if (!host->h_monitored && nsm_monitor(host) < 0) {
@ -562,15 +564,32 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
goto out; goto out;
} }
do { if (req->a_args.block) {
if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0) { status = nlmclnt_prepare_block(req, host, fl);
if (resp->status != NLM_LCK_BLOCKED)
break;
status = nlmclnt_block(host, fl, &resp->status);
}
if (status < 0) if (status < 0)
goto out; goto out;
} while (resp->status == NLM_LCK_BLOCKED && req->a_args.block); }
for(;;) {
status = nlmclnt_call(req, NLMPROC_LOCK);
if (status < 0)
goto out_unblock;
if (resp->status != NLM_LCK_BLOCKED)
break;
/* Wait on an NLM blocking lock */
timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT);
/* Did a reclaimer thread notify us of a server reboot? */
if (resp->status == NLM_LCK_DENIED_GRACE_PERIOD)
continue;
if (resp->status != NLM_LCK_BLOCKED)
break;
if (timeout >= 0)
continue;
/* We were interrupted. Send a CANCEL request to the server
* and exit
*/
status = (int)timeout;
goto out_unblock;
}
if (resp->status == NLM_LCK_GRANTED) { if (resp->status == NLM_LCK_GRANTED) {
fl->fl_u.nfs_fl.state = host->h_state; fl->fl_u.nfs_fl.state = host->h_state;
@ -579,6 +598,11 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
do_vfs_lock(fl); do_vfs_lock(fl);
} }
status = nlm_stat_to_errno(resp->status); status = nlm_stat_to_errno(resp->status);
out_unblock:
nlmclnt_finish_block(req);
/* Cancel the blocked request if it is still pending */
if (resp->status == NLM_LCK_BLOCKED)
nlmclnt_cancel(host, fl);
out: out:
nlmclnt_release_lockargs(req); nlmclnt_release_lockargs(req);
return status; return status;

View file

@ -72,6 +72,8 @@ struct nlm_lockowner {
uint32_t pid; uint32_t pid;
}; };
struct nlm_wait;
/* /*
* Memory chunk for NLM client RPC request. * Memory chunk for NLM client RPC request.
*/ */
@ -81,6 +83,7 @@ struct nlm_rqst {
struct nlm_host * a_host; /* host handle */ struct nlm_host * a_host; /* host handle */
struct nlm_args a_args; /* arguments */ struct nlm_args a_args; /* arguments */
struct nlm_res a_res; /* result */ struct nlm_res a_res; /* result */
struct nlm_wait * a_block;
char a_owner[NLMCLNT_OHSIZE]; char a_owner[NLMCLNT_OHSIZE];
}; };
@ -142,7 +145,9 @@ extern unsigned long nlmsvc_timeout;
* Lockd client functions * Lockd client functions
*/ */
struct nlm_rqst * nlmclnt_alloc_call(void); struct nlm_rqst * nlmclnt_alloc_call(void);
int nlmclnt_block(struct nlm_host *, struct file_lock *, u32 *); int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl);
void nlmclnt_finish_block(struct nlm_rqst *req);
long nlmclnt_block(struct nlm_rqst *req, long timeout);
int nlmclnt_cancel(struct nlm_host *, struct file_lock *); int nlmclnt_cancel(struct nlm_host *, struct file_lock *);
u32 nlmclnt_grant(struct nlm_lock *); u32 nlmclnt_grant(struct nlm_lock *);
void nlmclnt_recovery(struct nlm_host *, u32); void nlmclnt_recovery(struct nlm_host *, u32);