kernel-fxtec-pro1x/fs/lockd/svclock.c
Trond Myklebust 7bab377fcb lockd: Don't expose the process pid to the NLM server
Instead we use the nlm_lockowner->pid.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2006-03-20 13:44:06 -05:00

689 lines
19 KiB
C

/*
* linux/fs/lockd/svclock.c
*
* Handling of server-side locks, mostly of the blocked variety.
* This is the ugliest part of lockd because we tread on very thin ice.
* GRANT and CANCEL calls may get stuck, meet in mid-flight, etc.
* IMNSHO introducing the grant callback into the NLM protocol was one
* of the worst ideas Sun ever had. Except maybe for the idea of doing
* NFS file locking at all.
*
* I'm trying hard to avoid race conditions by protecting most accesses
* to a file's list of blocked locks through a semaphore. The global
* list of blocked locks is not protected in this fashion however.
* Therefore, some functions (such as the RPC callback for the async grant
* call) move blocked locks towards the head of the list *while some other
* process might be traversing it*. This should not be a problem in
* practice, because this will only cause functions traversing the list
* to visit some blocks twice.
*
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
*/
#include <linux/config.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
#include <linux/lockd/nlm.h>
#include <linux/lockd/lockd.h>
#define NLMDBG_FACILITY NLMDBG_SVCLOCK
#ifdef CONFIG_LOCKD_V4
#define nlm_deadlock nlm4_deadlock
#else
#define nlm_deadlock nlm_lck_denied
#endif
static void nlmsvc_insert_block(struct nlm_block *block, unsigned long);
static int nlmsvc_remove_block(struct nlm_block *block);
static const struct rpc_call_ops nlmsvc_grant_ops;
/*
* The list of blocked locks to retry
*/
static struct nlm_block * nlm_blocked;
/*
* Insert a blocked lock into the global list
*/
static void
nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
{
struct nlm_block **bp, *b;
dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when);
if (block->b_queued)
nlmsvc_remove_block(block);
bp = &nlm_blocked;
if (when != NLM_NEVER) {
if ((when += jiffies) == NLM_NEVER)
when ++;
while ((b = *bp) && time_before_eq(b->b_when,when) && b->b_when != NLM_NEVER)
bp = &b->b_next;
} else
while ((b = *bp) != 0)
bp = &b->b_next;
block->b_queued = 1;
block->b_when = when;
block->b_next = b;
*bp = block;
}
/*
* Remove a block from the global list
*/
static int
nlmsvc_remove_block(struct nlm_block *block)
{
struct nlm_block **bp, *b;
if (!block->b_queued)
return 1;
for (bp = &nlm_blocked; (b = *bp) != 0; bp = &b->b_next) {
if (b == block) {
*bp = block->b_next;
block->b_queued = 0;
return 1;
}
}
return 0;
}
/*
* Find a block for a given lock and optionally remove it from
* the list.
*/
static struct nlm_block *
nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
{
struct nlm_block **head, *block;
struct file_lock *fl;
dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %Ld-%Ld ty=%d\n",
file, lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end, lock->fl.fl_type);
for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) {
fl = &block->b_call.a_args.lock.fl;
dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
block->b_file, fl->fl_pid,
(long long)fl->fl_start,
(long long)fl->fl_end, fl->fl_type,
nlmdbg_cookie2a(&block->b_call.a_args.cookie));
if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
if (remove) {
*head = block->b_next;
block->b_queued = 0;
}
return block;
}
}
return NULL;
}
static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b)
{
if(a->len != b->len)
return 0;
if(memcmp(a->data,b->data,a->len))
return 0;
return 1;
}
/*
* Find a block with a given NLM cookie.
*/
static inline struct nlm_block *
nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin)
{
struct nlm_block *block;
for (block = nlm_blocked; block; block = block->b_next) {
dprintk("cookie: head of blocked queue %p, block %p\n",
nlm_blocked, block);
if (nlm_cookie_match(&block->b_call.a_args.cookie,cookie)
&& nlm_cmp_addr(sin, &block->b_host->h_addr))
break;
}
return block;
}
/*
* Create a block and initialize it.
*
* Note: we explicitly set the cookie of the grant reply to that of
* the blocked lock request. The spec explicitly mentions that the client
* should _not_ rely on the callback containing the same cookie as the
* request, but (as I found out later) that's because some implementations
* do just this. Never mind the standards comittees, they support our
* logging industries.
*/
static inline struct nlm_block *
nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_lock *lock, struct nlm_cookie *cookie)
{
struct nlm_block *block;
struct nlm_host *host;
struct nlm_rqst *call;
/* Create host handle for callback */
host = nlmclnt_lookup_host(&rqstp->rq_addr,
rqstp->rq_prot, rqstp->rq_vers);
if (host == NULL)
return NULL;
/* Allocate memory for block, and initialize arguments */
if (!(block = (struct nlm_block *) kmalloc(sizeof(*block), GFP_KERNEL)))
goto failed;
memset(block, 0, sizeof(*block));
locks_init_lock(&block->b_call.a_args.lock.fl);
locks_init_lock(&block->b_call.a_res.lock.fl);
if (!nlmclnt_setgrantargs(&block->b_call, lock))
goto failed_free;
/* Set notifier function for VFS, and init args */
block->b_call.a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
block->b_call.a_args.cookie = *cookie; /* see above */
dprintk("lockd: created block %p...\n", block);
/* Create and initialize the block */
block->b_daemon = rqstp->rq_server;
block->b_host = host;
block->b_file = file;
/* Add to file's list of blocks */
block->b_fnext = file->f_blocks;
file->f_blocks = block;
/* Set up RPC arguments for callback */
call = &block->b_call;
call->a_host = host;
call->a_flags = RPC_TASK_ASYNC;
return block;
failed_free:
kfree(block);
failed:
nlm_release_host(host);
return NULL;
}
/*
* Delete a block. If the lock was cancelled or the grant callback
* failed, unlock is set to 1.
* It is the caller's responsibility to check whether the file
* can be closed hereafter.
*/
static int
nlmsvc_delete_block(struct nlm_block *block, int unlock)
{
struct file_lock *fl = &block->b_call.a_args.lock.fl;
struct nlm_file *file = block->b_file;
struct nlm_block **bp;
int status = 0;
dprintk("lockd: deleting block %p...\n", block);
/* Remove block from list */
nlmsvc_remove_block(block);
if (unlock)
status = posix_unblock_lock(file->f_file, fl);
/* If the block is in the middle of a GRANT callback,
* don't kill it yet. */
if (block->b_incall) {
nlmsvc_insert_block(block, NLM_NEVER);
block->b_done = 1;
return status;
}
/* Remove block from file's list of blocks */
for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) {
if (*bp == block) {
*bp = block->b_fnext;
break;
}
}
if (block->b_host)
nlm_release_host(block->b_host);
nlmclnt_freegrantargs(&block->b_call);
kfree(block);
return status;
}
/*
* Loop over all blocks and perform the action specified.
* (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
*/
int
nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
{
struct nlm_block *block, *next;
/* XXX: Will everything get cleaned up if we don't unlock here? */
down(&file->f_sema);
for (block = file->f_blocks; block; block = next) {
next = block->b_fnext;
if (action == NLM_ACT_MARK)
block->b_host->h_inuse = 1;
else if (action == NLM_ACT_UNLOCK) {
if (host == NULL || host == block->b_host)
nlmsvc_delete_block(block, 1);
}
}
up(&file->f_sema);
return 0;
}
/*
* Attempt to establish a lock, and if it can't be granted, block it
* if required.
*/
u32
nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
{
struct file_lock *conflock;
struct nlm_block *block;
int error;
dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
file->f_file->f_dentry->d_inode->i_sb->s_id,
file->f_file->f_dentry->d_inode->i_ino,
lock->fl.fl_type, lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end,
wait);
/* Get existing block (in case client is busy-waiting) */
block = nlmsvc_lookup_block(file, lock, 0);
lock->fl.fl_flags |= FL_LOCKD;
again:
/* Lock file against concurrent access */
down(&file->f_sema);
if (!(conflock = posix_test_lock(file->f_file, &lock->fl))) {
error = posix_lock_file(file->f_file, &lock->fl);
if (block)
nlmsvc_delete_block(block, 0);
up(&file->f_sema);
dprintk("lockd: posix_lock_file returned %d\n", -error);
switch(-error) {
case 0:
return nlm_granted;
case EDEADLK:
return nlm_deadlock;
case EAGAIN:
return nlm_lck_denied;
default: /* includes ENOLCK */
return nlm_lck_denied_nolocks;
}
}
if (!wait) {
up(&file->f_sema);
return nlm_lck_denied;
}
if (posix_locks_deadlock(&lock->fl, conflock)) {
up(&file->f_sema);
return nlm_deadlock;
}
/* If we don't have a block, create and initialize it. Then
* retry because we may have slept in kmalloc. */
/* We have to release f_sema as nlmsvc_create_block may try to
* to claim it while doing host garbage collection */
if (block == NULL) {
up(&file->f_sema);
dprintk("lockd: blocking on this lock (allocating).\n");
if (!(block = nlmsvc_create_block(rqstp, file, lock, cookie)))
return nlm_lck_denied_nolocks;
goto again;
}
/* Append to list of blocked */
nlmsvc_insert_block(block, NLM_NEVER);
if (list_empty(&block->b_call.a_args.lock.fl.fl_block)) {
/* Now add block to block list of the conflicting lock
if we haven't done so. */
dprintk("lockd: blocking on this lock.\n");
posix_block_lock(conflock, &block->b_call.a_args.lock.fl);
}
up(&file->f_sema);
return nlm_lck_blocked;
}
/*
* Test for presence of a conflicting lock.
*/
u32
nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
struct nlm_lock *conflock)
{
struct file_lock *fl;
dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
file->f_file->f_dentry->d_inode->i_sb->s_id,
file->f_file->f_dentry->d_inode->i_ino,
lock->fl.fl_type,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
if ((fl = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
fl->fl_type, (long long)fl->fl_start,
(long long)fl->fl_end);
conflock->caller = "somehost"; /* FIXME */
conflock->oh.len = 0; /* don't return OH info */
conflock->svid = fl->fl_pid;
conflock->fl = *fl;
return nlm_lck_denied;
}
return nlm_granted;
}
/*
* Remove a lock.
* This implies a CANCEL call: We send a GRANT_MSG, the client replies
* with a GRANT_RES call which gets lost, and calls UNLOCK immediately
* afterwards. In this case the block will still be there, and hence
* must be removed.
*/
u32
nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock)
{
int error;
dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
file->f_file->f_dentry->d_inode->i_sb->s_id,
file->f_file->f_dentry->d_inode->i_ino,
lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
/* First, cancel any lock that might be there */
nlmsvc_cancel_blocked(file, lock);
lock->fl.fl_type = F_UNLCK;
error = posix_lock_file(file->f_file, &lock->fl);
return (error < 0)? nlm_lck_denied_nolocks : nlm_granted;
}
/*
* Cancel a previously blocked request.
*
* A cancel request always overrides any grant that may currently
* be in progress.
* The calling procedure must check whether the file can be closed.
*/
u32
nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
{
struct nlm_block *block;
int status = 0;
dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
file->f_file->f_dentry->d_inode->i_sb->s_id,
file->f_file->f_dentry->d_inode->i_ino,
lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
down(&file->f_sema);
if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL)
status = nlmsvc_delete_block(block, 1);
up(&file->f_sema);
return status ? nlm_lck_denied : nlm_granted;
}
/*
* Unblock a blocked lock request. This is a callback invoked from the
* VFS layer when a lock on which we blocked is removed.
*
* This function doesn't grant the blocked lock instantly, but rather moves
* the block to the head of nlm_blocked where it can be picked up by lockd.
*/
static void
nlmsvc_notify_blocked(struct file_lock *fl)
{
struct nlm_block **bp, *block;
dprintk("lockd: VFS unblock notification for block %p\n", fl);
for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) {
if (nlm_compare_locks(&block->b_call.a_args.lock.fl, fl)) {
nlmsvc_insert_block(block, 0);
svc_wake_up(block->b_daemon);
return;
}
}
printk(KERN_WARNING "lockd: notification for unknown block!\n");
}
static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2)
{
return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid;
}
struct lock_manager_operations nlmsvc_lock_operations = {
.fl_compare_owner = nlmsvc_same_owner,
.fl_notify = nlmsvc_notify_blocked,
};
/*
* Try to claim a lock that was previously blocked.
*
* Note that we use both the RPC_GRANTED_MSG call _and_ an async
* RPC thread when notifying the client. This seems like overkill...
* Here's why:
* - we don't want to use a synchronous RPC thread, otherwise
* we might find ourselves hanging on a dead portmapper.
* - Some lockd implementations (e.g. HP) don't react to
* RPC_GRANTED calls; they seem to insist on RPC_GRANTED_MSG calls.
*/
static void
nlmsvc_grant_blocked(struct nlm_block *block)
{
struct nlm_file *file = block->b_file;
struct nlm_lock *lock = &block->b_call.a_args.lock;
struct file_lock *conflock;
int error;
dprintk("lockd: grant blocked lock %p\n", block);
/* First thing is lock the file */
down(&file->f_sema);
/* Unlink block request from list */
nlmsvc_remove_block(block);
/* If b_granted is true this means we've been here before.
* Just retry the grant callback, possibly refreshing the RPC
* binding */
if (block->b_granted) {
nlm_rebind_host(block->b_host);
goto callback;
}
/* Try the lock operation again */
if ((conflock = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
/* Bummer, we blocked again */
dprintk("lockd: lock still blocked\n");
nlmsvc_insert_block(block, NLM_NEVER);
posix_block_lock(conflock, &lock->fl);
up(&file->f_sema);
return;
}
/* Alright, no conflicting lock. Now lock it for real. If the
* following yields an error, this is most probably due to low
* memory. Retry the lock in a few seconds.
*/
if ((error = posix_lock_file(file->f_file, &lock->fl)) < 0) {
printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
-error, __FUNCTION__);
nlmsvc_insert_block(block, 10 * HZ);
up(&file->f_sema);
return;
}
callback:
/* Lock was granted by VFS. */
dprintk("lockd: GRANTing blocked lock.\n");
block->b_granted = 1;
block->b_incall = 1;
/* Schedule next grant callback in 30 seconds */
nlmsvc_insert_block(block, 30 * HZ);
/* Call the client */
nlm_get_host(block->b_call.a_host);
if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
&nlmsvc_grant_ops) < 0)
nlm_release_host(block->b_call.a_host);
up(&file->f_sema);
}
/*
* This is the callback from the RPC layer when the NLM_GRANTED_MSG
* RPC call has succeeded or timed out.
* Like all RPC callbacks, it is invoked by the rpciod process, so it
* better not sleep. Therefore, we put the blocked lock on the nlm_blocked
* chain once more in order to have it removed by lockd itself (which can
* then sleep on the file semaphore without disrupting e.g. the nfs client).
*/
static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
{
struct nlm_rqst *call = data;
struct nlm_block *block;
unsigned long timeout;
struct sockaddr_in *peer_addr = RPC_PEERADDR(task->tk_client);
dprintk("lockd: GRANT_MSG RPC callback\n");
dprintk("callback: looking for cookie %s, host (%u.%u.%u.%u)\n",
nlmdbg_cookie2a(&call->a_args.cookie),
NIPQUAD(peer_addr->sin_addr.s_addr));
if (!(block = nlmsvc_find_block(&call->a_args.cookie, peer_addr))) {
dprintk("lockd: no block for cookie %s, host (%u.%u.%u.%u)\n",
nlmdbg_cookie2a(&call->a_args.cookie),
NIPQUAD(peer_addr->sin_addr.s_addr));
return;
}
/* Technically, we should down the file semaphore here. Since we
* move the block towards the head of the queue only, no harm
* can be done, though. */
if (task->tk_status < 0) {
/* RPC error: Re-insert for retransmission */
timeout = 10 * HZ;
} else if (block->b_done) {
/* Block already removed, kill it for real */
timeout = 0;
} else {
/* Call was successful, now wait for client callback */
timeout = 60 * HZ;
}
nlmsvc_insert_block(block, timeout);
svc_wake_up(block->b_daemon);
block->b_incall = 0;
nlm_release_host(call->a_host);
}
static const struct rpc_call_ops nlmsvc_grant_ops = {
.rpc_call_done = nlmsvc_grant_callback,
};
/*
* We received a GRANT_RES callback. Try to find the corresponding
* block.
*/
void
nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status)
{
struct nlm_block *block;
struct nlm_file *file;
dprintk("grant_reply: looking for cookie %x, host (%08x), s=%d \n",
*(unsigned int *)(cookie->data),
ntohl(rqstp->rq_addr.sin_addr.s_addr), status);
if (!(block = nlmsvc_find_block(cookie, &rqstp->rq_addr)))
return;
file = block->b_file;
file->f_count++;
down(&file->f_sema);
block = nlmsvc_find_block(cookie, &rqstp->rq_addr);
if (block) {
if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
/* Try again in a couple of seconds */
nlmsvc_insert_block(block, 10 * HZ);
up(&file->f_sema);
} else {
/* Lock is now held by client, or has been rejected.
* In both cases, the block should be removed. */
up(&file->f_sema);
if (status == NLM_LCK_GRANTED)
nlmsvc_delete_block(block, 0);
else
nlmsvc_delete_block(block, 1);
}
}
nlm_release_file(file);
}
/*
* Retry all blocked locks that have been notified. This is where lockd
* picks up locks that can be granted, or grant notifications that must
* be retransmitted.
*/
unsigned long
nlmsvc_retry_blocked(void)
{
struct nlm_block *block;
dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
nlm_blocked,
nlm_blocked? nlm_blocked->b_when : 0);
while ((block = nlm_blocked) != 0) {
if (block->b_when == NLM_NEVER)
break;
if (time_after(block->b_when,jiffies))
break;
dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n",
block, block->b_when, block->b_done);
if (block->b_done)
nlmsvc_delete_block(block, 0);
else
nlmsvc_grant_blocked(block);
}
if ((block = nlm_blocked) && block->b_when != NLM_NEVER)
return (block->b_when - jiffies);
return MAX_SCHEDULE_TIMEOUT;
}