From adae0fe0ea87e8fb1a72dde304937c60759b495f Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 5 Apr 2012 21:04:37 +0400 Subject: [PATCH 01/11] SUNRPC: register PipeFS file system after pernet sybsystem PipeFS superblock creation routine relays on SUNRPC pernet data presense, which is created on register_pernet_subsys() call in SUNRPC module init function. Registering of PipeFS filesystem prior to registering of per-net subsystem leads to races (mount of PipeFS can dereference uninitialized data). Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/sunrpc_syms.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 8adfc88e793a..3d6498af9adc 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -75,19 +75,20 @@ static struct pernet_operations sunrpc_net_ops = { static int __init init_sunrpc(void) { - int err = register_rpc_pipefs(); + int err = rpc_init_mempool(); if (err) goto out; - err = rpc_init_mempool(); - if (err) - goto out2; err = rpcauth_init_module(); if (err) - goto out3; + goto out2; cache_initialize(); err = register_pernet_subsys(&sunrpc_net_ops); + if (err) + goto out3; + + err = register_rpc_pipefs(); if (err) goto out4; #ifdef RPC_DEBUG @@ -98,11 +99,11 @@ init_sunrpc(void) return 0; out4: - rpcauth_remove_module(); + unregister_pernet_subsys(&sunrpc_net_ops); out3: - rpc_destroy_mempool(); + rpcauth_remove_module(); out2: - unregister_rpc_pipefs(); + rpc_destroy_mempool(); out: return err; } From ca138f368a36dba40d3ef4a53d64af2011cda3c7 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 5 Apr 2012 15:26:36 -0400 Subject: [PATCH 02/11] NFS: check for req==NULL in nfs_try_to_update_request cleanup Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2c68818f68ac..9b8d4d42a8af 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -682,7 +682,8 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, req->wb_bytes = rqend - req->wb_offset; out_unlock: spin_unlock(&inode->i_lock); - nfs_clear_request_commit(req); + if (req) + nfs_clear_request_commit(req); return req; out_flushme: spin_unlock(&inode->i_lock); From 05ffe24f5290dc095f98fbaf84afe51ef404ccc5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 18 Apr 2012 12:20:10 -0400 Subject: [PATCH 03/11] NFSv4: Ensure that the LOCK code sets exception->inode All callers of nfs4_handle_exception() that need to handle NFS4ERR_OPENMODE correctly should set exception->inode Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f82bde005a82..3c787d02131b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4558,7 +4558,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request) { struct nfs_server *server = NFS_SERVER(state->inode); - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .inode = state->inode, + }; int err; do { @@ -4576,7 +4578,9 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) { struct nfs_server *server = NFS_SERVER(state->inode); - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .inode = state->inode, + }; int err; err = nfs4_set_lock_state(state, request); @@ -4676,6 +4680,7 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock * { struct nfs4_exception exception = { .state = state, + .inode = state->inode, }; int err; From 55725513b5ef9d462aa3e18527658a0362aaae83 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 18 Apr 2012 12:48:35 -0400 Subject: [PATCH 04/11] NFSv4: Ensure that we check lock exclusive/shared type against open modes Since we may be simulating flock() locks using NFS byte range locks, we can't rely on the VFS having checked the file open mode for us. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3c787d02131b..ba837d977a1a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4726,6 +4726,20 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) if (state == NULL) return -ENOLCK; + /* + * Don't rely on the VFS having checked the file open mode, + * since it won't do this for flock() locks. + */ + switch (request->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) { + case F_RDLCK: + if (!(filp->f_mode & FMODE_READ)) + return -EBADF; + break; + case F_WRLCK: + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + } + do { status = nfs4_proc_setlk(state, cmd, request); if ((status != -EAGAIN) || IS_SETLK(cmd)) From 451146be933e26e21277852b5e40c6a52266ef96 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 18 Apr 2012 16:29:11 -0400 Subject: [PATCH 05/11] NFSv4: Fix open(O_TRUNC) and ftruncate() error handling If the file wasn't opened for writing, then truncate and ftruncate need to report the appropriate errors. Reported-by: Miklos Szeredi Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/dir.c | 4 ++-- fs/nfs/nfs4proc.c | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4aaf0316d76a..8789210c6905 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1429,7 +1429,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry } open_flags = nd->intent.open.flags; - attr.ia_valid = 0; + attr.ia_valid = ATTR_OPEN; ctx = create_nfs_open_context(dentry, open_flags); res = ERR_CAST(ctx); @@ -1536,7 +1536,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) if (IS_ERR(ctx)) goto out; - attr.ia_valid = 0; + attr.ia_valid = ATTR_OPEN; if (openflags & O_TRUNC) { attr.ia_valid |= ATTR_SIZE; attr.ia_size = 0; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ba837d977a1a..f875cf305237 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1954,10 +1954,19 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, }; int err; do { - err = nfs4_handle_exception(server, - _nfs4_do_setattr(inode, cred, fattr, sattr, state), - &exception); + err = _nfs4_do_setattr(inode, cred, fattr, sattr, state); + switch (err) { + case -NFS4ERR_OPENMODE: + if (state && !(state->state & FMODE_WRITE)) { + err = -EBADF; + if (sattr->ia_valid & ATTR_OPEN) + err = -EACCES; + goto out; + } + } + err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); +out: return err; } From 73fb7bc7c57d971b11f2e00536ac2d3e316e0609 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:34 -0400 Subject: [PATCH 06/11] NFS: put open context on error in nfs_pagein_multi Cc: Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 9a0e8ef4a409..0a4be28c2ea3 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -322,7 +322,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head while (!list_empty(res)) { data = list_entry(res->next, struct nfs_read_data, list); list_del(&data->list); - nfs_readdata_free(data); + nfs_readdata_release(data); } nfs_readpage_release(req); return -ENOMEM; From 8ccd271f7a3a846ce6f85ead0760d9d12994a611 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:35 -0400 Subject: [PATCH 07/11] NFS: put open context on error in nfs_flush_multi Cc: Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9b8d4d42a8af..c07462320f6b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1019,7 +1019,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head while (!list_empty(res)) { data = list_entry(res->next, struct nfs_write_data, list); list_del(&data->list); - nfs_writedata_free(data); + nfs_writedata_release(data); } nfs_redirty_request(req); return -ENOMEM; From 98a2139f4f4d7b5fcc3a54c7fddbe88612abed20 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 3 Sep 2011 01:09:43 +0200 Subject: [PATCH 08/11] nfs: Enclose hostname in brackets when needed in nfs_do_root_mount When hostname contains colon (e.g. when it is an IPv6 address) it needs to be enclosed in brackets to make parsing of NFS device string possible. Fix nfs_do_root_mount() to enclose hostname properly when needed. NFS code actually does not need this as it does not parse the string passed by nfs_do_root_mount() but the device string is exposed to userspace in /proc/mounts. CC: Josh Boyer CC: Trond Myklebust Signed-off-by: Jan Kara Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 37412f706b32..1e6715f0616c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2767,11 +2767,15 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, char *root_devname; size_t len; - len = strlen(hostname) + 3; + len = strlen(hostname) + 5; root_devname = kmalloc(len, GFP_KERNEL); if (root_devname == NULL) return ERR_PTR(-ENOMEM); - snprintf(root_devname, len, "%s:/", hostname); + /* Does hostname needs to be enclosed in brackets? */ + if (strchr(hostname, ':')) + snprintf(root_devname, len, "[%s]:/", hostname); + else + snprintf(root_devname, len, "%s:/", hostname); root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); kfree(root_devname); return root_mnt; From 95b72eb0bdef6476b7e73061f0382adf46c5495a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Apr 2012 19:24:51 -0400 Subject: [PATCH 09/11] NFSv4: Ensure we do not reuse open owner names The NFSv4 spec is ambiguous about whether or not it is permissible to reuse open owner names, so play it safe. This patch adds a timestamp to the state_owner structure, and combines that with the IDA based uniquifier. Fixes a regression whereby the Linux server returns NFS4ERR_BAD_SEQID. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 6 +++--- fs/nfs/nfs4state.c | 1 + fs/nfs/nfs4xdr.c | 9 +++++---- include/linux/nfs_xdr.h | 7 ++++++- 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 97ecc863dd76..b6db9e33fb7b 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -59,6 +59,7 @@ struct nfs_unique_id { #define NFS_SEQID_CONFIRMED 1 struct nfs_seqid_counter { + ktime_t create_time; int owner_id; int flags; u32 counter; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f875cf305237..60d5f4c26dda 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -838,7 +838,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); p->o_arg.clientid = server->nfs_client->cl_clientid; - p->o_arg.id = sp->so_seqid.owner_id; + p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); + p->o_arg.id.uniquifier = sp->so_seqid.owner_id; p->o_arg.name = &dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; @@ -1466,8 +1467,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) goto unlock_no_action; rcu_read_unlock(); } - /* Update sequence id. */ - data->o_arg.id = sp->so_seqid.owner_id; + /* Update client id. */ data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0f43414eb25a..3b07f094f3a9 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -393,6 +393,7 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp) static void nfs4_init_seqid_counter(struct nfs_seqid_counter *sc) { + sc->create_time = ktime_get(); sc->flags = 0; sc->counter = 0; spin_lock_init(&sc->lock); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c74fdb114b48..77fc5f959c4e 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -74,7 +74,7 @@ static int nfs4_stat_to_errno(int); /* lock,open owner id: * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) */ -#define open_owner_id_maxsz (1 + 1 + 4) +#define open_owner_id_maxsz (1 + 2 + 1 + 1 + 2) #define lock_owner_id_maxsz (1 + 1 + 4) #define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) #define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) @@ -1340,12 +1340,13 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena */ encode_nfs4_seqid(xdr, arg->seqid); encode_share_access(xdr, arg->fmode); - p = reserve_space(xdr, 32); + p = reserve_space(xdr, 36); p = xdr_encode_hyper(p, arg->clientid); - *p++ = cpu_to_be32(20); + *p++ = cpu_to_be32(24); p = xdr_encode_opaque_fixed(p, "open id:", 8); *p++ = cpu_to_be32(arg->server->s_dev); - xdr_encode_hyper(p, arg->id); + *p++ = cpu_to_be32(arg->id.uniquifier); + xdr_encode_hyper(p, arg->id.create_time); } static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index bfd0d1bf6707..7ba3551a0414 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -312,6 +312,11 @@ struct nfs4_layoutreturn { int rpc_status; }; +struct stateowner_id { + __u64 create_time; + __u32 uniquifier; +}; + /* * Arguments to the open call. */ @@ -321,7 +326,7 @@ struct nfs_openargs { int open_flags; fmode_t fmode; __u64 clientid; - __u64 id; + struct stateowner_id id; union { struct { struct iattr * attrs; /* UNCHECKED, GUARDED */ From c77365c963cf8703ecf1004cd3b298068a3e1b76 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 21 Apr 2012 12:31:05 -0400 Subject: [PATCH 10/11] NFSv4: Ensure that we don't drop a state owner more than once Retest the RB_EMPTY_NODE() condition under the spin lock to ensure that we don't call rb_erase() more than once on the same state owner. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3b07f094f3a9..b300fb840b21 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -435,13 +435,17 @@ nfs4_alloc_state_owner(struct nfs_server *server, static void nfs4_drop_state_owner(struct nfs4_state_owner *sp) { - if (!RB_EMPTY_NODE(&sp->so_server_node)) { + struct rb_node *rb_node = &sp->so_server_node; + + if (!RB_EMPTY_NODE(rb_node)) { struct nfs_server *server = sp->so_server; struct nfs_client *clp = server->nfs_client; spin_lock(&clp->cl_lock); - rb_erase(&sp->so_server_node, &server->state_owners); - RB_CLEAR_NODE(&sp->so_server_node); + if (!RB_EMPTY_NODE(rb_node)) { + rb_erase(rb_node, &server->state_owners); + RB_CLEAR_NODE(rb_node); + } spin_unlock(&clp->cl_lock); } } From 7bf97bc27308cfdc7a8dadd40ae50f7c4cb09b01 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 21 Apr 2012 12:36:19 -0400 Subject: [PATCH 11/11] NFSv4: Keep dropped state owners on the LRU list for a while To ensure that we don't reuse their identifiers. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b300fb840b21..7f0fcfc1fe9d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -521,6 +521,14 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, /** * nfs4_put_state_owner - Release a nfs4_state_owner * @sp: state owner data to release + * + * Note that we keep released state owners on an LRU + * list. + * This caches valid state owners so that they can be + * reused, to avoid the OPEN_CONFIRM on minor version 0. + * It also pins the uniquifier of dropped state owners for + * a while, to ensure that those state owner names are + * never reused. */ void nfs4_put_state_owner(struct nfs4_state_owner *sp) { @@ -530,15 +538,9 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp) if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) return; - if (!RB_EMPTY_NODE(&sp->so_server_node)) { - sp->so_expires = jiffies; - list_add_tail(&sp->so_lru, &server->state_owners_lru); - spin_unlock(&clp->cl_lock); - } else { - nfs4_remove_state_owner_locked(sp); - spin_unlock(&clp->cl_lock); - nfs4_free_state_owner(sp); - } + sp->so_expires = jiffies; + list_add_tail(&sp->so_lru, &server->state_owners_lru); + spin_unlock(&clp->cl_lock); } /**