From 1132b26029918aa8fb5ba24a81b5c234e61f356c Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 13 Aug 2010 17:31:16 -0400 Subject: [PATCH 01/99] nfsd: remove duplicate NFS4_STATEID_SIZE declaration Use NFS4_STATEID_SIZE from include/linux/nfs4 Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 988cbb3a19b6..014482c4e57d 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -41,7 +41,6 @@ #define NFSPROC4_CB_NULL 0 #define NFSPROC4_CB_COMPOUND 1 -#define NFS4_STATEID_SIZE 16 /* Index of predefined Linux callback client operations */ From 17cebf658e088935d4bdebfc7ad9800e9fc4a0b2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Aug 2010 16:55:22 +1000 Subject: [PATCH 02/99] sunrpc: extract some common sunrpc_cache code from nfsd Rather can duplicating this idiom twice, put it in an inline function. This reduces the usage of 'expiry_time' out side the sunrpc/cache.c code and thus the impact of a change that is about to be made to that field. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 9 +++------ include/linux/sunrpc/cache.h | 6 ++++++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index c2a4f71d87dd..e56827b88fd2 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -935,10 +935,9 @@ static void exp_fsid_unhash(struct svc_export *exp) ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); if (!IS_ERR(ek)) { - ek->h.expiry_time = get_seconds()-1; + sunrpc_invalidate(&ek->h, &svc_expkey_cache); cache_put(&ek->h, &svc_expkey_cache); } - svc_expkey_cache.nextcheck = get_seconds(); } static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) @@ -973,10 +972,9 @@ static void exp_unhash(struct svc_export *exp) ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); if (!IS_ERR(ek)) { - ek->h.expiry_time = get_seconds()-1; + sunrpc_invalidate(&ek->h, &svc_expkey_cache); cache_put(&ek->h, &svc_expkey_cache); } - svc_expkey_cache.nextcheck = get_seconds(); } /* @@ -1097,8 +1095,7 @@ exp_export(struct nfsctl_export *nxp) static void exp_do_unexport(svc_export *unexp) { - unexp->h.expiry_time = get_seconds()-1; - svc_export_cache.nextcheck = get_seconds(); + sunrpc_invalidate(&unexp->h, &svc_export_cache); exp_unhash(unexp); exp_fsid_unhash(unexp); } diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 7bf3e84b92f4..0e1febf4e5bc 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -228,4 +228,10 @@ static inline time_t get_expiry(char **bpp) return rv; } +static inline void sunrpc_invalidate(struct cache_head *h, + struct cache_detail *detail) +{ + h->expiry_time = get_seconds() - 1; + detail->nextcheck = get_seconds(); +} #endif /* _LINUX_SUNRPC_CACHE_H_ */ From c5b29f885afe890f953f7f23424045cdad31d3e4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Aug 2010 16:55:22 +1000 Subject: [PATCH 03/99] sunrpc: use seconds since boot in expiry cache This protects us from confusion when the wallclock time changes. We convert to and from wallclock when setting or reading expiry times. Also use seconds since boot for last_clost time. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfs/dns_resolve.c | 6 +++--- fs/nfsd/nfs4idmap.c | 2 +- include/linux/sunrpc/cache.h | 28 +++++++++++++++++++++++++--- net/sunrpc/cache.c | 36 +++++++++++++++++++----------------- 4 files changed, 48 insertions(+), 24 deletions(-) diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index dba50a5625db..a6e711ad130f 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -167,7 +167,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd, return 0; } item = container_of(h, struct nfs_dns_ent, h); - ttl = (long)item->h.expiry_time - (long)get_seconds(); + ttl = item->h.expiry_time - seconds_since_boot(); if (ttl < 0) ttl = 0; @@ -239,7 +239,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) ttl = get_expiry(&buf); if (ttl == 0) goto out; - key.h.expiry_time = ttl + get_seconds(); + key.h.expiry_time = ttl + seconds_since_boot(); ret = -ENOMEM; item = nfs_dns_lookup(cd, &key); @@ -301,7 +301,7 @@ static int do_cache_lookup_nowait(struct cache_detail *cd, goto out_err; ret = -ETIMEDOUT; if (!test_bit(CACHE_VALID, &(*item)->h.flags) - || (*item)->h.expiry_time < get_seconds() + || (*item)->h.expiry_time < seconds_since_boot() || cd->flush_time > (*item)->h.last_refresh) goto out_put; ret = -ENOENT; diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index c78dbf493424..808b33a4a090 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -550,7 +550,7 @@ do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *), goto out_err; ret = -ETIMEDOUT; if (!test_bit(CACHE_VALID, &(*item)->h.flags) - || (*item)->h.expiry_time < get_seconds() + || (*item)->h.expiry_time < seconds_since_boot() || detail->flush_time > (*item)->h.last_refresh) goto out_put; ret = -ENOENT; diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 0e1febf4e5bc..ece432b7f87f 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -218,20 +218,42 @@ static inline int get_int(char **bpp, int *anint) return 0; } +/* + * timestamps kept in the cache are expressed in seconds + * since boot. This is the best for measuring differences in + * real time. + */ +static inline time_t seconds_since_boot(void) +{ + struct timespec boot; + getboottime(&boot); + return get_seconds() - boot.tv_sec; +} + +static inline time_t convert_to_wallclock(time_t sinceboot) +{ + struct timespec boot; + getboottime(&boot); + return boot.tv_sec + sinceboot; +} + static inline time_t get_expiry(char **bpp) { int rv; + struct timespec boot; + if (get_int(bpp, &rv)) return 0; if (rv < 0) return 0; - return rv; + getboottime(&boot); + return rv - boot.tv_sec; } static inline void sunrpc_invalidate(struct cache_head *h, struct cache_detail *detail) { - h->expiry_time = get_seconds() - 1; - detail->nextcheck = get_seconds(); + h->expiry_time = seconds_since_boot() - 1; + detail->nextcheck = seconds_since_boot(); } #endif /* _LINUX_SUNRPC_CACHE_H_ */ diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 2b06410e584e..8dc121955fdc 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -42,7 +42,7 @@ static void cache_revisit_request(struct cache_head *item); static void cache_init(struct cache_head *h) { - time_t now = get_seconds(); + time_t now = seconds_since_boot(); h->next = NULL; h->flags = 0; kref_init(&h->ref); @@ -52,7 +52,7 @@ static void cache_init(struct cache_head *h) static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) { - return (h->expiry_time < get_seconds()) || + return (h->expiry_time < seconds_since_boot()) || (detail->flush_time > h->last_refresh); } @@ -127,7 +127,7 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch); static void cache_fresh_locked(struct cache_head *head, time_t expiry) { head->expiry_time = expiry; - head->last_refresh = get_seconds(); + head->last_refresh = seconds_since_boot(); set_bit(CACHE_VALID, &head->flags); } @@ -238,7 +238,7 @@ int cache_check(struct cache_detail *detail, /* now see if we want to start an upcall */ refresh_age = (h->expiry_time - h->last_refresh); - age = get_seconds() - h->last_refresh; + age = seconds_since_boot() - h->last_refresh; if (rqstp == NULL) { if (rv == -EAGAIN) @@ -253,7 +253,7 @@ int cache_check(struct cache_detail *detail, cache_revisit_request(h); if (rv == -EAGAIN) { set_bit(CACHE_NEGATIVE, &h->flags); - cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY); + cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); cache_fresh_unlocked(h, detail); rv = -ENOENT; } @@ -388,11 +388,11 @@ static int cache_clean(void) return -1; } current_detail = list_entry(next, struct cache_detail, others); - if (current_detail->nextcheck > get_seconds()) + if (current_detail->nextcheck > seconds_since_boot()) current_index = current_detail->hash_size; else { current_index = 0; - current_detail->nextcheck = get_seconds()+30*60; + current_detail->nextcheck = seconds_since_boot()+30*60; } } @@ -477,7 +477,7 @@ EXPORT_SYMBOL_GPL(cache_flush); void cache_purge(struct cache_detail *detail) { detail->flush_time = LONG_MAX; - detail->nextcheck = get_seconds(); + detail->nextcheck = seconds_since_boot(); cache_flush(); detail->flush_time = 1; } @@ -902,7 +902,7 @@ static int cache_release(struct inode *inode, struct file *filp, filp->private_data = NULL; kfree(rp); - cd->last_close = get_seconds(); + cd->last_close = seconds_since_boot(); atomic_dec(&cd->readers); } module_put(cd->owner); @@ -1034,7 +1034,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, int len; if (atomic_read(&detail->readers) == 0 && - detail->last_close < get_seconds() - 30) { + detail->last_close < seconds_since_boot() - 30) { warn_no_listener(detail); return -EINVAL; } @@ -1219,7 +1219,8 @@ static int c_show(struct seq_file *m, void *p) ifdebug(CACHE) seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n", - cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags); + convert_to_wallclock(cp->expiry_time), + atomic_read(&cp->ref.refcount), cp->flags); cache_get(cp); if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ @@ -1285,7 +1286,7 @@ static ssize_t read_flush(struct file *file, char __user *buf, unsigned long p = *ppos; size_t len; - sprintf(tbuf, "%lu\n", cd->flush_time); + sprintf(tbuf, "%lu\n", convert_to_wallclock(cd->flush_time)); len = strlen(tbuf); if (p >= len) return 0; @@ -1303,19 +1304,20 @@ static ssize_t write_flush(struct file *file, const char __user *buf, struct cache_detail *cd) { char tbuf[20]; - char *ep; - long flushtime; + char *bp, *ep; + if (*ppos || count > sizeof(tbuf)-1) return -EINVAL; if (copy_from_user(tbuf, buf, count)) return -EFAULT; tbuf[count] = 0; - flushtime = simple_strtoul(tbuf, &ep, 0); + simple_strtoul(tbuf, &ep, 0); if (*ep && *ep != '\n') return -EINVAL; - cd->flush_time = flushtime; - cd->nextcheck = get_seconds(); + bp = tbuf; + cd->flush_time = get_expiry(&bp); + cd->nextcheck = seconds_since_boot(); cache_flush(); *ppos += count; From f16b6e8d838b2e2bb4561201311c66ac02ad67df Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Aug 2010 17:04:06 +1000 Subject: [PATCH 04/99] sunrpc/cache: allow threads to block while waiting for cache update. The current practice of waiting for cache updates by queueing the whole request to be retried has (at least) two problems. 1/ With NFSv4, requests can be quite complex and re-trying a whole request when a later part fails should only be a last-resort, not a normal practice. 2/ Large requests, and in particular any 'write' request, will not be queued by the current code and doing so would be undesirable. In many cases only a very sort wait is needed before the cache gets valid data. So, providing the underlying transport permits it by setting ->thread_wait, arrange to wait briefly for an upcall to be completed (as reflected in the clearing of CACHE_PENDING). If the short wait was not long enough and CACHE_PENDING is still set, fall back on the old approach. The 'thread_wait' value is set to 5 seconds when there are spare threads, and 1 second when there are no spare threads. These values are probably much higher than needed, but will ensure some forward progress. Note that as we only request an update for a non-valid item, and as non-valid items are updated in place it is extremely unlikely that cache_check will return -ETIMEDOUT. Normally cache_defer_req will sleep for a short while and then find that the item is_valid. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 3 ++ net/sunrpc/cache.c | 59 +++++++++++++++++++++++++++++++++++- net/sunrpc/svc_xprt.c | 11 +++++++ 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index ece432b7f87f..52a7d7224e90 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -125,6 +125,9 @@ struct cache_detail { */ struct cache_req { struct cache_deferred_req *(*defer)(struct cache_req *req); + int thread_wait; /* How long (jiffies) we can block the + * current thread to wait for updates. + */ }; /* this must be embedded in a deferred_request that is being * delayed awaiting cache-fill diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 8dc121955fdc..2c5297f245b4 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -509,10 +509,22 @@ static LIST_HEAD(cache_defer_list); static struct list_head cache_defer_hash[DFR_HASHSIZE]; static int cache_defer_cnt; +struct thread_deferred_req { + struct cache_deferred_req handle; + struct completion completion; +}; +static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many) +{ + struct thread_deferred_req *dr = + container_of(dreq, struct thread_deferred_req, handle); + complete(&dr->completion); +} + static int cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq, *discard; int hash = DFR_HASH(item); + struct thread_deferred_req sleeper; if (cache_defer_cnt >= DFR_MAX) { /* too much in the cache, randomly drop this one, @@ -521,7 +533,15 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) if (net_random()&1) return -ENOMEM; } - dreq = req->defer(req); + if (req->thread_wait) { + dreq = &sleeper.handle; + sleeper.completion = + COMPLETION_INITIALIZER_ONSTACK(sleeper.completion); + dreq->revisit = cache_restart_thread; + } else + dreq = req->defer(req); + + retry: if (dreq == NULL) return -ENOMEM; @@ -555,6 +575,43 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) cache_revisit_request(item); return -EAGAIN; } + + if (dreq == &sleeper.handle) { + if (wait_for_completion_interruptible_timeout( + &sleeper.completion, req->thread_wait) <= 0) { + /* The completion wasn't completed, so we need + * to clean up + */ + spin_lock(&cache_defer_lock); + if (!list_empty(&sleeper.handle.hash)) { + list_del_init(&sleeper.handle.recent); + list_del_init(&sleeper.handle.hash); + cache_defer_cnt--; + spin_unlock(&cache_defer_lock); + } else { + /* cache_revisit_request already removed + * this from the hash table, but hasn't + * called ->revisit yet. It will very soon + * and we need to wait for it. + */ + spin_unlock(&cache_defer_lock); + wait_for_completion(&sleeper.completion); + } + } + if (test_bit(CACHE_PENDING, &item->flags)) { + /* item is still pending, try request + * deferral + */ + dreq = req->defer(req); + goto retry; + } + /* only return success if we actually deferred the + * request. In this case we waited until it was + * answered so no deferral has happened - rather + * an answer already exists. + */ + return -EEXIST; + } return 0; } diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index cbc084939dd8..8ff6840866fa 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -651,6 +651,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (signalled() || kthread_should_stop()) return -EINTR; + /* Normally we will wait up to 5 seconds for any required + * cache information to be provided. + */ + rqstp->rq_chandle.thread_wait = 5*HZ; + spin_lock_bh(&pool->sp_lock); xprt = svc_xprt_dequeue(pool); if (xprt) { @@ -658,6 +663,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) svc_xprt_get(xprt); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); + + /* As there is a shortage of threads and this request + * had to be queue, don't allow the thread to wait so + * long for cache updates. + */ + rqstp->rq_chandle.thread_wait = 1*HZ; } else { /* No data pending. Go to sleep */ svc_thread_enqueue(pool, rqstp); From 6610f720e9e8103c22d1f1ccf8fbb695550a571f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 26 Aug 2010 13:19:52 -0400 Subject: [PATCH 05/99] svcrpc: minor cache cleanup Pull out some code into helper functions, fix a typo. Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 44 +++++++++++++++++++++++-------------------- net/sunrpc/svc_xprt.c | 2 +- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 2c5297f245b4..18e5e8e6f622 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -520,10 +520,26 @@ static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many) complete(&dr->completion); } +static void __unhash_deferred_req(struct cache_deferred_req *dreq) +{ + list_del_init(&dreq->recent); + list_del_init(&dreq->hash); + cache_defer_cnt--; +} + +static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item) +{ + int hash = DFR_HASH(item); + + list_add(&dreq->recent, &cache_defer_list); + if (cache_defer_hash[hash].next == NULL) + INIT_LIST_HEAD(&cache_defer_hash[hash]); + list_add(&dreq->hash, &cache_defer_hash[hash]); +} + static int cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq, *discard; - int hash = DFR_HASH(item); struct thread_deferred_req sleeper; if (cache_defer_cnt >= DFR_MAX) { @@ -549,20 +565,14 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) spin_lock(&cache_defer_lock); - list_add(&dreq->recent, &cache_defer_list); - - if (cache_defer_hash[hash].next == NULL) - INIT_LIST_HEAD(&cache_defer_hash[hash]); - list_add(&dreq->hash, &cache_defer_hash[hash]); + __hash_deferred_req(dreq, item); /* it is in, now maybe clean up */ discard = NULL; if (++cache_defer_cnt > DFR_MAX) { discard = list_entry(cache_defer_list.prev, struct cache_deferred_req, recent); - list_del_init(&discard->recent); - list_del_init(&discard->hash); - cache_defer_cnt--; + __unhash_deferred_req(discard); } spin_unlock(&cache_defer_lock); @@ -584,9 +594,7 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) */ spin_lock(&cache_defer_lock); if (!list_empty(&sleeper.handle.hash)) { - list_del_init(&sleeper.handle.recent); - list_del_init(&sleeper.handle.hash); - cache_defer_cnt--; + __unhash_deferred_req(&sleeper.handle); spin_unlock(&cache_defer_lock); } else { /* cache_revisit_request already removed @@ -632,9 +640,8 @@ static void cache_revisit_request(struct cache_head *item) dreq = list_entry(lp, struct cache_deferred_req, hash); lp = lp->next; if (dreq->item == item) { - list_del_init(&dreq->hash); - list_move(&dreq->recent, &pending); - cache_defer_cnt--; + __unhash_deferred_req(dreq); + list_add(&dreq->recent, &pending); } } } @@ -657,11 +664,8 @@ void cache_clean_deferred(void *owner) spin_lock(&cache_defer_lock); list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { - if (dreq->owner == owner) { - list_del_init(&dreq->hash); - list_move(&dreq->recent, &pending); - cache_defer_cnt--; - } + if (dreq->owner == owner) + __unhash_deferred_req(dreq); } spin_unlock(&cache_defer_lock); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 8ff6840866fa..95fc3e8c51d6 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -665,7 +665,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); /* As there is a shortage of threads and this request - * had to be queue, don't allow the thread to wait so + * had to be queued, don't allow the thread to wait so * long for cache updates. */ rqstp->rq_chandle.thread_wait = 1*HZ; From 3211af1119174fbe8b676422b74870cdd51d7314 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 26 Aug 2010 16:56:23 -0400 Subject: [PATCH 06/99] svcrpc: cache deferral cleanup Attempt to make obvious the first-try-sleeping-then-try-deferral logic by putting that logic into a top-level function that calls helpers. Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 151 +++++++++++++++++++++++++-------------------- 1 file changed, 83 insertions(+), 68 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 18e5e8e6f622..da872f9fe1e0 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -509,17 +509,6 @@ static LIST_HEAD(cache_defer_list); static struct list_head cache_defer_hash[DFR_HASHSIZE]; static int cache_defer_cnt; -struct thread_deferred_req { - struct cache_deferred_req handle; - struct completion completion; -}; -static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many) -{ - struct thread_deferred_req *dr = - container_of(dreq, struct thread_deferred_req, handle); - complete(&dr->completion); -} - static void __unhash_deferred_req(struct cache_deferred_req *dreq) { list_del_init(&dreq->recent); @@ -537,29 +526,9 @@ static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_he list_add(&dreq->hash, &cache_defer_hash[hash]); } -static int cache_defer_req(struct cache_req *req, struct cache_head *item) +static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item) { - struct cache_deferred_req *dreq, *discard; - struct thread_deferred_req sleeper; - - if (cache_defer_cnt >= DFR_MAX) { - /* too much in the cache, randomly drop this one, - * or continue and drop the oldest below - */ - if (net_random()&1) - return -ENOMEM; - } - if (req->thread_wait) { - dreq = &sleeper.handle; - sleeper.completion = - COMPLETION_INITIALIZER_ONSTACK(sleeper.completion); - dreq->revisit = cache_restart_thread; - } else - dreq = req->defer(req); - - retry: - if (dreq == NULL) - return -ENOMEM; + struct cache_deferred_req *discard; dreq->item = item; @@ -585,44 +554,90 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) cache_revisit_request(item); return -EAGAIN; } - - if (dreq == &sleeper.handle) { - if (wait_for_completion_interruptible_timeout( - &sleeper.completion, req->thread_wait) <= 0) { - /* The completion wasn't completed, so we need - * to clean up - */ - spin_lock(&cache_defer_lock); - if (!list_empty(&sleeper.handle.hash)) { - __unhash_deferred_req(&sleeper.handle); - spin_unlock(&cache_defer_lock); - } else { - /* cache_revisit_request already removed - * this from the hash table, but hasn't - * called ->revisit yet. It will very soon - * and we need to wait for it. - */ - spin_unlock(&cache_defer_lock); - wait_for_completion(&sleeper.completion); - } - } - if (test_bit(CACHE_PENDING, &item->flags)) { - /* item is still pending, try request - * deferral - */ - dreq = req->defer(req); - goto retry; - } - /* only return success if we actually deferred the - * request. In this case we waited until it was - * answered so no deferral has happened - rather - * an answer already exists. - */ - return -EEXIST; - } return 0; } +struct thread_deferred_req { + struct cache_deferred_req handle; + struct completion completion; +}; + +static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many) +{ + struct thread_deferred_req *dr = + container_of(dreq, struct thread_deferred_req, handle); + complete(&dr->completion); +} + +static int cache_wait_req(struct cache_req *req, struct cache_head *item) +{ + struct thread_deferred_req sleeper; + struct cache_deferred_req *dreq = &sleeper.handle; + int ret; + + sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion); + dreq->revisit = cache_restart_thread; + + ret = setup_deferral(dreq, item); + if (ret) + return ret; + + if (wait_for_completion_interruptible_timeout( + &sleeper.completion, req->thread_wait) <= 0) { + /* The completion wasn't completed, so we need + * to clean up + */ + spin_lock(&cache_defer_lock); + if (!list_empty(&sleeper.handle.hash)) { + __unhash_deferred_req(&sleeper.handle); + spin_unlock(&cache_defer_lock); + } else { + /* cache_revisit_request already removed + * this from the hash table, but hasn't + * called ->revisit yet. It will very soon + * and we need to wait for it. + */ + spin_unlock(&cache_defer_lock); + wait_for_completion(&sleeper.completion); + } + } + if (test_bit(CACHE_PENDING, &item->flags)) { + /* item is still pending, try request + * deferral + */ + return -ETIMEDOUT; + } + /* only return success if we actually deferred the + * request. In this case we waited until it was + * answered so no deferral has happened - rather + * an answer already exists. + */ + return -EEXIST; +} + +static int cache_defer_req(struct cache_req *req, struct cache_head *item) +{ + struct cache_deferred_req *dreq; + int ret; + + if (cache_defer_cnt >= DFR_MAX) { + /* too much in the cache, randomly drop this one, + * or continue and drop the oldest + */ + if (net_random()&1) + return -ENOMEM; + } + if (req->thread_wait) { + ret = cache_wait_req(req, item); + if (ret != -ETIMEDOUT) + return ret; + } + dreq = req->defer(req); + if (dreq == NULL) + return -ENOMEM; + return setup_deferral(dreq, item); +} + static void cache_revisit_request(struct cache_head *item) { struct cache_deferred_req *dreq; From 06497524589f2a7717da33969d541674e0a27da6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 19 Sep 2010 22:55:06 -0400 Subject: [PATCH 07/99] nfsd4: fix hang on fast-booting nfs servers The last_close field of a cache_detail is initialized to zero, so the condition detail->last_close < seconds_since_boot() - 30 may be false even for a cache that was never opened. However, we want to immediately fail upcalls to caches that were never opened: in the case of the auth_unix_gid cache, especially, which may never be opened by mountd (if the --manage-gids option is not set), we want to fail the upcall immediately. Otherwise client requests will be dropped unnecessarily on reboot. Also document these conditions. Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index da872f9fe1e0..ca7c621cd975 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1091,6 +1091,23 @@ static void warn_no_listener(struct cache_detail *detail) } } +static bool cache_listeners_exist(struct cache_detail *detail) +{ + if (atomic_read(&detail->readers)) + return true; + if (detail->last_close == 0) + /* This cache was never opened */ + return false; + if (detail->last_close < seconds_since_boot() - 30) + /* + * We allow for the possibility that someone might + * restart a userspace daemon without restarting the + * server; but after 30 seconds, we give up. + */ + return false; + return true; +} + /* * register an upcall request to user-space and queue it up for read() by the * upcall daemon. @@ -1109,10 +1126,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, char *bp; int len; - if (atomic_read(&detail->readers) == 0 && - detail->last_close < seconds_since_boot() - 30) { - warn_no_listener(detail); - return -EINVAL; + if (!cache_listeners_exist(detail)) { + warn_no_listener(detail); + return -EINVAL; } buf = kmalloc(PAGE_SIZE, GFP_KERNEL); From 1ebede86b8abbcf8833830e18e05391758cf2f28 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Aug 2010 17:04:07 +1000 Subject: [PATCH 08/99] sunrpc: close connection when a request is irretrievably lost. If we drop a request in the sunrpc layer, either due kmalloc failure, or due to a cache miss when we could not queue the request for later replay, then close the connection to encourage the client to retry sooner. Note that if the drop happens in the NFS layer, NFSERR_JUKEBOX (aka NFS4ERR_DELAY) is returned to guide the client concerning replay. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svcauth.h | 10 +++++++--- net/sunrpc/auth_gss/svcauth_gss.c | 12 ++++++------ net/sunrpc/svc.c | 3 +++ net/sunrpc/svcauth_unix.c | 11 ++++++++--- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index d39dbdc7b10f..11266935e2d6 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -108,9 +108,13 @@ struct auth_ops { #define SVC_NEGATIVE 4 #define SVC_OK 5 #define SVC_DROP 6 -#define SVC_DENIED 7 -#define SVC_PENDING 8 -#define SVC_COMPLETE 9 +#define SVC_CLOSE 7 /* Like SVC_DROP, but request is definitely + * lost so if there is a tcp connection, it + * should be closed + */ +#define SVC_DENIED 8 +#define SVC_PENDING 9 +#define SVC_COMPLETE 10 extern int svc_authenticate(struct svc_rqst *rqstp, __be32 *authp); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index cc385b3a59c2..ed005af3ef5d 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -964,7 +964,7 @@ svcauth_gss_set_client(struct svc_rqst *rqstp) if (rqstp->rq_gssclient == NULL) return SVC_DENIED; stat = svcauth_unix_set_client(rqstp); - if (stat == SVC_DROP) + if (stat == SVC_DROP || stat == SVC_CLOSE) return stat; return SVC_OK; } @@ -1018,7 +1018,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, return SVC_DENIED; memset(&rsikey, 0, sizeof(rsikey)); if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) - return SVC_DROP; + return SVC_CLOSE; *authp = rpc_autherr_badverf; if (svc_safe_getnetobj(argv, &tmpobj)) { kfree(rsikey.in_handle.data); @@ -1026,22 +1026,22 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, } if (dup_netobj(&rsikey.in_token, &tmpobj)) { kfree(rsikey.in_handle.data); - return SVC_DROP; + return SVC_CLOSE; } /* Perform upcall, or find upcall result: */ rsip = rsi_lookup(&rsikey); rsi_free(&rsikey); if (!rsip) - return SVC_DROP; + return SVC_CLOSE; switch (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { case -EAGAIN: case -ETIMEDOUT: case -ENOENT: /* No upcall result: */ - return SVC_DROP; + return SVC_CLOSE; case 0: - ret = SVC_DROP; + ret = SVC_CLOSE; /* Got an answer to the upcall; use it: */ if (gss_write_init_verf(rqstp, rsip)) goto out; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index d9017d64597e..6359c42c4941 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1055,6 +1055,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto err_bad; case SVC_DENIED: goto err_bad_auth; + case SVC_CLOSE: + if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) + svc_close_xprt(rqstp->rq_xprt); case SVC_DROP: goto dropit; case SVC_COMPLETE: diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 207311610988..e91b550bc836 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -674,6 +674,8 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp) switch (ret) { case -ENOENT: return ERR_PTR(-ENOENT); + case -ETIMEDOUT: + return ERR_PTR(-ESHUTDOWN); case 0: gi = get_group_info(ug->gi); cache_put(&ug->h, &unix_gid_cache); @@ -720,8 +722,9 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { default: BUG(); - case -EAGAIN: case -ETIMEDOUT: + return SVC_CLOSE; + case -EAGAIN: return SVC_DROP; case -ENOENT: return SVC_DENIED; @@ -736,6 +739,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) switch (PTR_ERR(gi)) { case -EAGAIN: return SVC_DROP; + case -ESHUTDOWN: + return SVC_CLOSE; case -ENOENT: break; default: @@ -776,7 +781,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) cred->cr_gid = (gid_t) -1; cred->cr_group_info = groups_alloc(0); if (cred->cr_group_info == NULL) - return SVC_DROP; /* kmalloc failure - client must retry */ + return SVC_CLOSE; /* kmalloc failure - client must retry */ /* Put NULL verifier */ svc_putnl(resv, RPC_AUTH_NULL); @@ -840,7 +845,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) goto badcred; cred->cr_group_info = groups_alloc(slen); if (cred->cr_group_info == NULL) - return SVC_DROP; + return SVC_CLOSE; for (i = 0; i < slen; i++) GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { From 8ff30fa4eff2ff9e207961c654caa093f0c84873 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Aug 2010 17:04:07 +1000 Subject: [PATCH 09/99] nfsd: disable deferral for NFSv4 Now that a slight delay in getting a reply to an upcall doesn't require deferring of requests, request deferral for all NFSv4 requests - the concept doesn't really fit with the v4 model. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 59ec449b0c7f..0cdfd022bb7b 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1031,8 +1031,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, resp->cstate.session = NULL; fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); - /* Use the deferral mechanism only for NFSv4.0 compounds */ - rqstp->rq_usedeferral = (args->minorversion == 0); + /* + * Don't use the deferral mechanism for NFSv4; compounds make it + * too hard to avoid non-idempotency problems. + */ + rqstp->rq_usedeferral = 0; /* * According to RFC3010, this takes precedence over all other errors. From 839049a8732d689d02051e0198fb60a22f7ccb4b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Aug 2010 17:04:06 +1000 Subject: [PATCH 10/99] nfsd/idmap: drop special request deferal in favour of improved default. The idmap code manages request deferal by waiting for a reply from userspace rather than putting the NFS request on a queue to be retried from the start. Now that the common deferal code does this there is no need for the special code in idmap. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4idmap.c | 105 +++++--------------------------------------- 1 file changed, 11 insertions(+), 94 deletions(-) diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 808b33a4a090..f0695e815f0e 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -482,109 +482,26 @@ nfsd_idmap_shutdown(void) cache_unregister(&nametoid_cache); } -/* - * Deferred request handling - */ - -struct idmap_defer_req { - struct cache_req req; - struct cache_deferred_req deferred_req; - wait_queue_head_t waitq; - atomic_t count; -}; - -static inline void -put_mdr(struct idmap_defer_req *mdr) -{ - if (atomic_dec_and_test(&mdr->count)) - kfree(mdr); -} - -static inline void -get_mdr(struct idmap_defer_req *mdr) -{ - atomic_inc(&mdr->count); -} - -static void -idmap_revisit(struct cache_deferred_req *dreq, int toomany) -{ - struct idmap_defer_req *mdr = - container_of(dreq, struct idmap_defer_req, deferred_req); - - wake_up(&mdr->waitq); - put_mdr(mdr); -} - -static struct cache_deferred_req * -idmap_defer(struct cache_req *req) -{ - struct idmap_defer_req *mdr = - container_of(req, struct idmap_defer_req, req); - - mdr->deferred_req.revisit = idmap_revisit; - get_mdr(mdr); - return (&mdr->deferred_req); -} - -static inline int -do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *), struct ent *key, - struct cache_detail *detail, struct ent **item, - struct idmap_defer_req *mdr) -{ - *item = lookup_fn(key); - if (!*item) - return -ENOMEM; - return cache_check(detail, &(*item)->h, &mdr->req); -} - -static inline int -do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *), - struct ent *key, struct cache_detail *detail, - struct ent **item) -{ - int ret = -ENOMEM; - - *item = lookup_fn(key); - if (!*item) - goto out_err; - ret = -ETIMEDOUT; - if (!test_bit(CACHE_VALID, &(*item)->h.flags) - || (*item)->h.expiry_time < seconds_since_boot() - || detail->flush_time > (*item)->h.last_refresh) - goto out_put; - ret = -ENOENT; - if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags)) - goto out_put; - return 0; -out_put: - cache_put(&(*item)->h, detail); -out_err: - *item = NULL; - return ret; -} - static int idmap_lookup(struct svc_rqst *rqstp, struct ent *(*lookup_fn)(struct ent *), struct ent *key, struct cache_detail *detail, struct ent **item) { - struct idmap_defer_req *mdr; int ret; - mdr = kzalloc(sizeof(*mdr), GFP_KERNEL); - if (!mdr) + *item = lookup_fn(key); + if (!*item) return -ENOMEM; - atomic_set(&mdr->count, 1); - init_waitqueue_head(&mdr->waitq); - mdr->req.defer = idmap_defer; - ret = do_idmap_lookup(lookup_fn, key, detail, item, mdr); - if (ret == -EAGAIN) { - wait_event_interruptible_timeout(mdr->waitq, - test_bit(CACHE_VALID, &(*item)->h.flags), 1 * HZ); - ret = do_idmap_lookup_nowait(lookup_fn, key, detail, item); + retry: + ret = cache_check(detail, &(*item)->h, &rqstp->rq_chandle); + + if (ret == -ETIMEDOUT) { + struct ent *prev_item = *item; + *item = lookup_fn(key); + if (*item != prev_item) + goto retry; + cache_put(&(*item)->h, detail); } - put_mdr(mdr); return ret; } From 2ed5282cd9b44686a6e718269abb5c5cd332d8f1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Aug 2010 17:04:07 +1000 Subject: [PATCH 11/99] svcauth_gss: replace a trivial 'switch' with an 'if' Code like: switch(xxx) { case -error1: case -error2: .. return; case 0: stuff; } can more naturally be written: if (xxx < 0) return; stuff; Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 41 ++++++++++++++----------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index ed005af3ef5d..dec2a6fc7c12 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1034,30 +1034,27 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, rsi_free(&rsikey); if (!rsip) return SVC_CLOSE; - switch (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { - case -EAGAIN: - case -ETIMEDOUT: - case -ENOENT: + if (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0) /* No upcall result: */ return SVC_CLOSE; - case 0: - ret = SVC_CLOSE; - /* Got an answer to the upcall; use it: */ - if (gss_write_init_verf(rqstp, rsip)) - goto out; - if (resv->iov_len + 4 > PAGE_SIZE) - goto out; - svc_putnl(resv, RPC_SUCCESS); - if (svc_safe_putnetobj(resv, &rsip->out_handle)) - goto out; - if (resv->iov_len + 3 * 4 > PAGE_SIZE) - goto out; - svc_putnl(resv, rsip->major_status); - svc_putnl(resv, rsip->minor_status); - svc_putnl(resv, GSS_SEQ_WIN); - if (svc_safe_putnetobj(resv, &rsip->out_token)) - goto out; - } + + ret = SVC_CLOSE; + /* Got an answer to the upcall; use it: */ + if (gss_write_init_verf(rqstp, rsip)) + goto out; + if (resv->iov_len + 4 > PAGE_SIZE) + goto out; + svc_putnl(resv, RPC_SUCCESS); + if (svc_safe_putnetobj(resv, &rsip->out_handle)) + goto out; + if (resv->iov_len + 3 * 4 > PAGE_SIZE) + goto out; + svc_putnl(resv, rsip->major_status); + svc_putnl(resv, rsip->minor_status); + svc_putnl(resv, GSS_SEQ_WIN); + if (svc_safe_putnetobj(resv, &rsip->out_token)) + goto out; + ret = SVC_COMPLETE; out: cache_put(&rsip->h, &rsi_cache); From 1117449276bb909b029ed0b9ba13f53e4784db9d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Aug 2010 17:04:08 +1000 Subject: [PATCH 12/99] sunrpc/cache: change deferred-request hash table to use hlist. Being a hash table, hlist is the best option. There is currently some ugliness were we treat "->next == NULL" as a special case to avoid having to initialise the whole array. This change nicely gets rid of that case. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 2 +- net/sunrpc/cache.c | 28 ++++++++++------------------ 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 52a7d7224e90..03496357f455 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -133,7 +133,7 @@ struct cache_req { * delayed awaiting cache-fill */ struct cache_deferred_req { - struct list_head hash; /* on hash chain */ + struct hlist_node hash; /* on hash chain */ struct list_head recent; /* on fifo */ struct cache_head *item; /* cache item we wait on */ void *owner; /* we might need to discard all defered requests diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index ca7c621cd975..2a8405194056 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -506,13 +506,13 @@ EXPORT_SYMBOL_GPL(cache_purge); static DEFINE_SPINLOCK(cache_defer_lock); static LIST_HEAD(cache_defer_list); -static struct list_head cache_defer_hash[DFR_HASHSIZE]; +static struct hlist_head cache_defer_hash[DFR_HASHSIZE]; static int cache_defer_cnt; static void __unhash_deferred_req(struct cache_deferred_req *dreq) { list_del_init(&dreq->recent); - list_del_init(&dreq->hash); + hlist_del_init(&dreq->hash); cache_defer_cnt--; } @@ -521,9 +521,7 @@ static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_he int hash = DFR_HASH(item); list_add(&dreq->recent, &cache_defer_list); - if (cache_defer_hash[hash].next == NULL) - INIT_LIST_HEAD(&cache_defer_hash[hash]); - list_add(&dreq->hash, &cache_defer_hash[hash]); + hlist_add_head(&dreq->hash, &cache_defer_hash[hash]); } static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item) @@ -588,7 +586,7 @@ static int cache_wait_req(struct cache_req *req, struct cache_head *item) * to clean up */ spin_lock(&cache_defer_lock); - if (!list_empty(&sleeper.handle.hash)) { + if (!hlist_unhashed(&sleeper.handle.hash)) { __unhash_deferred_req(&sleeper.handle); spin_unlock(&cache_defer_lock); } else { @@ -642,24 +640,18 @@ static void cache_revisit_request(struct cache_head *item) { struct cache_deferred_req *dreq; struct list_head pending; - - struct list_head *lp; + struct hlist_node *lp, *tmp; int hash = DFR_HASH(item); INIT_LIST_HEAD(&pending); spin_lock(&cache_defer_lock); - lp = cache_defer_hash[hash].next; - if (lp) { - while (lp != &cache_defer_hash[hash]) { - dreq = list_entry(lp, struct cache_deferred_req, hash); - lp = lp->next; - if (dreq->item == item) { - __unhash_deferred_req(dreq); - list_add(&dreq->recent, &pending); - } + hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash) + if (dreq->item == item) { + __unhash_deferred_req(dreq); + list_add(&dreq->recent, &pending); } - } + spin_unlock(&cache_defer_lock); while (!list_empty(&pending)) { From e7f483eabea8ef6d2b5ce1b74c8184cc06819f15 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 21 Sep 2010 09:40:25 +0300 Subject: [PATCH 13/99] sunrpc/cache: don't use custom hex_to_bin() converter Signed-off-by: Andy Shevchenko Cc: Trond Myklebust Cc: linux-nfs@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 2a8405194056..ac2c6e6abe65 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1179,13 +1179,19 @@ int qword_get(char **bpp, char *dest, int bufsize) if (bp[0] == '\\' && bp[1] == 'x') { /* HEX STRING */ bp += 2; - while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) { - int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; - bp++; - byte <<= 4; - byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; - *dest++ = byte; - bp++; + while (len < bufsize) { + int h, l; + + h = hex_to_bin(bp[0]); + if (h < 0) + break; + + l = hex_to_bin(bp[1]); + if (l < 0) + break; + + *dest++ = (h << 4) | l; + bp += 2; len++; } } else { From f904be9cc77f361d37d71468b13ff3d1a1823dea Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 21 Sep 2010 16:38:12 -0400 Subject: [PATCH 14/99] lockd: Mostly remove BKL from the server This patch removes all but one call to lock_kernel() from the server. Signed-off-by: Bryan Schumaker Signed-off-by: J. Bruce Fields --- fs/lockd/svc4proc.c | 2 -- fs/lockd/svclock.c | 31 +++++++++++++++++++++---------- fs/lockd/svcproc.c | 2 -- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 031c6569a134..a336e832475d 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -230,9 +230,7 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data) static void nlm4svc_callback_release(void *data) { - lock_kernel(); nlm_release_call(data); - unlock_kernel(); } static const struct rpc_call_ops nlm4svc_callback_ops = { diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 84055d31bfc5..6f1ef000975a 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -52,12 +52,13 @@ static const struct rpc_call_ops nlmsvc_grant_ops; * The list of blocked locks to retry */ static LIST_HEAD(nlm_blocked); +static DEFINE_SPINLOCK(nlm_blocked_lock); /* * Insert a blocked lock into the global list */ static void -nlmsvc_insert_block(struct nlm_block *block, unsigned long when) +nlmsvc_insert_block_locked(struct nlm_block *block, unsigned long when) { struct nlm_block *b; struct list_head *pos; @@ -87,6 +88,13 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when) block->b_when = when; } +static void nlmsvc_insert_block(struct nlm_block *block, unsigned long when) +{ + spin_lock(&nlm_blocked_lock); + nlmsvc_insert_block_locked(block, when); + spin_unlock(&nlm_blocked_lock); +} + /* * Remove a block from the global list */ @@ -94,7 +102,9 @@ static inline void nlmsvc_remove_block(struct nlm_block *block) { if (!list_empty(&block->b_list)) { + spin_lock(&nlm_blocked_lock); list_del_init(&block->b_list); + spin_unlock(&nlm_blocked_lock); nlmsvc_release_block(block); } } @@ -651,7 +661,7 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf, struct nlm_block *block; int rc = -ENOENT; - lock_kernel(); + spin_lock(&nlm_blocked_lock); list_for_each_entry(block, &nlm_blocked, b_list) { if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n", @@ -665,13 +675,13 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf, } else if (result == 0) block->b_granted = 1; - nlmsvc_insert_block(block, 0); + nlmsvc_insert_block_locked(block, 0); svc_wake_up(block->b_daemon); rc = 0; break; } } - unlock_kernel(); + spin_unlock(&nlm_blocked_lock); if (rc == -ENOENT) printk(KERN_WARNING "lockd: grant for unknown block\n"); return rc; @@ -803,7 +813,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) dprintk("lockd: GRANT_MSG RPC callback\n"); - lock_kernel(); + spin_lock(&nlm_blocked_lock); /* if the block is not on a list at this point then it has * been invalidated. Don't try to requeue it. * @@ -825,19 +835,20 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) /* Call was successful, now wait for client callback */ timeout = 60 * HZ; } - nlmsvc_insert_block(block, timeout); + nlmsvc_insert_block_locked(block, timeout); svc_wake_up(block->b_daemon); out: - unlock_kernel(); + spin_unlock(&nlm_blocked_lock); } +/* + * FIXME: nlmsvc_release_block() grabs a mutex. This is not allowed for an + * .rpc_release rpc_call_op + */ static void nlmsvc_grant_release(void *data) { struct nlm_rqst *call = data; - - lock_kernel(); nlmsvc_release_block(call->a_block); - unlock_kernel(); } static const struct rpc_call_ops nlmsvc_grant_ops = { diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 0f2ab741ae7c..c3069f38d602 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -260,9 +260,7 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data) static void nlmsvc_callback_release(void *data) { - lock_kernel(); nlm_release_call(data); - unlock_kernel(); } static const struct rpc_call_ops nlmsvc_callback_ops = { From e95dffa4304186ad87963255f3e5e96b5c41849f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 22 Sep 2010 12:55:06 +1000 Subject: [PATCH 15/99] sunrpc/cache: fix recent breakage of cache_clean_deferred commit 6610f720e9e8103c22d1f1ccf8fbb695550a571f broke cache_clean_deferred as entries are no longer added to the pending list for subsequent revisiting. So put those requests back on the pending list. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index ac2c6e6abe65..ff733dfef3b8 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -671,8 +671,10 @@ void cache_clean_deferred(void *owner) spin_lock(&cache_defer_lock); list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { - if (dreq->owner == owner) + if (dreq->owner == owner) { __unhash_deferred_req(dreq); + list_add(&dreq->recent, &pending); + } } spin_unlock(&cache_defer_lock); From c67874f942e30039442d925b03793e0a46ddcddd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 22 Sep 2010 12:55:07 +1000 Subject: [PATCH 16/99] nfsd: formally deprecate legacy nfsd syscall interface The syscall interface is has been replaced by a more flexible interface since 2.6.0. It is time to work towards discarding the old interface. So add a entry in feature-removal-schedule.txt and print a warning when the interface is used. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- Documentation/feature-removal-schedule.txt | 10 ++++++++++ fs/nfsd/nfsctl.c | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 842aa9de84a6..076a2c02adaf 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -564,3 +564,13 @@ Who: FUJITA Tomonori ---------------------------- +What: access to nfsd auth cache through sys_nfsservctl or '.' files + in the 'nfsd' filesystem. +When: 2.6.40 +Why: This is a legacy interface which have been replaced by a more + dynamic cache. Continuing to maintain this interface is an + unnecessary burden. +Who: NeilBrown + +---------------------------- + diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index b53b1d042f1f..7f0fc8861b85 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -121,6 +121,16 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) { + static int warned; + if (file->f_dentry->d_name.name[0] == '.' && !warned) { + char name[sizeof(current->comm)]; + printk(KERN_INFO + "Warning: \"%s\" uses deprecated NFSD interface: %s." + " This will be removed in 2.6.40\n", + get_task_comm(name, current), + file->f_dentry->d_name.name); + warned = 1; + } if (! file->private_data) { /* An attempt to read a transaction file without writing * causes a 0-byte write so that the file can return From 1e1405673e4e40a94ed7620553eb440a21040402 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 22 Sep 2010 12:55:07 +1000 Subject: [PATCH 17/99] nfsd: allow deprecated interface to be compiled out. Add CONFIG_NFSD_DEPRECATED, default to y. Only include deprecated interface if this is defined. This allows distros to remove this interface before the official removal, and allows developers to test without it. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/Makefile | 5 +---- fs/compat.c | 2 +- fs/nfsd/Kconfig | 12 ++++++++++++ fs/nfsd/export.c | 22 +++++++++++++++++++--- fs/nfsd/nfsctl.c | 10 ++++++++++ 5 files changed, 43 insertions(+), 8 deletions(-) diff --git a/fs/Makefile b/fs/Makefile index e6ec1d309b1d..26956fcec917 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -29,10 +29,7 @@ obj-$(CONFIG_EVENTFD) += eventfd.o obj-$(CONFIG_AIO) += aio.o obj-$(CONFIG_FILE_LOCKING) += locks.o obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o - -nfsd-$(CONFIG_NFSD) := nfsctl.o -obj-y += $(nfsd-y) $(nfsd-m) - +obj-$(CONFIG_NFSD_DEPRECATED) += nfsctl.o obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o diff --git a/fs/compat.c b/fs/compat.c index 718c7062aec1..df5e671f0015 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1963,7 +1963,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, } #endif /* HAVE_SET_RESTORE_SIGMASK */ -#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) +#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && !defined(CONFIG_NFSD_DEPRECATED) /* Stuff for NFS server syscalls... */ struct compat_nfsctl_svc { u16 svc32_port; diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 4264377552e2..18b3e8975fe0 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -28,6 +28,18 @@ config NFSD If unsure, say N. +config NFSD_DEPRECATED + bool "Include support for deprecated syscall interface to NFSD" + depends on NFSD + default y + help + The syscall interface to nfsd was obsoleted in 2.6.0 by a new + filesystem based interface. The old interface is due for removal + in 2.6.40. If you wish to remove the interface before then + say N. + + In unsure, say Y. + config NFSD_V2_ACL bool depends on NFSD diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index e56827b88fd2..a3c7d0ceb24f 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -28,9 +28,6 @@ typedef struct auth_domain svc_client; typedef struct svc_export svc_export; -static void exp_do_unexport(svc_export *unexp); -static int exp_verify_string(char *cp, int max); - /* * We have two caches. * One maps client+vfsmnt+dentry to export options - the export map @@ -802,6 +799,7 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) return ek; } +#ifdef CONFIG_NFSD_DEPRECATED static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, struct svc_export *exp) { @@ -852,6 +850,7 @@ exp_get_fsid_key(svc_client *clp, int fsid) return exp_find_key(clp, FSID_NUM, fsidv, NULL); } +#endif static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, struct cache_req *reqp) @@ -893,6 +892,7 @@ static struct svc_export *exp_parent(svc_client *clp, struct path *path) return exp; } +#ifdef CONFIG_NFSD_DEPRECATED /* * Hashtable locking. Write locks are placed only by user processes * wanting to modify export information. @@ -925,6 +925,19 @@ exp_writeunlock(void) { up_write(&hash_sem); } +#else + +/* hash_sem not needed once deprecated interface is removed */ +void exp_readlock(void) {} +static inline void exp_writelock(void){} +void exp_readunlock(void) {} +static inline void exp_writeunlock(void){} + +#endif + +#ifdef CONFIG_NFSD_DEPRECATED +static void exp_do_unexport(svc_export *unexp); +static int exp_verify_string(char *cp, int max); static void exp_fsid_unhash(struct svc_export *exp) { @@ -1147,6 +1160,7 @@ exp_unexport(struct nfsctl_export *nxp) exp_writeunlock(); return err; } +#endif /* CONFIG_NFSD_DEPRECATED */ /* * Obtain the root fh on behalf of a client. @@ -1529,6 +1543,7 @@ const struct seq_operations nfs_exports_op = { .show = e_show, }; +#ifdef CONFIG_NFSD_DEPRECATED /* * Add or modify a client. * Change requests may involve the list of host addresses. The list of @@ -1618,6 +1633,7 @@ exp_verify_string(char *cp, int max) printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp); return 0; } +#endif /* CONFIG_NFSD_DEPRECATED */ /* * Initialize the exports module. diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7f0fc8861b85..b278e444e2f4 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -22,6 +22,7 @@ */ enum { NFSD_Root = 1, +#ifdef CONFIG_NFSD_DEPRECATED NFSD_Svc, NFSD_Add, NFSD_Del, @@ -29,6 +30,7 @@ enum { NFSD_Unexport, NFSD_Getfd, NFSD_Getfs, +#endif NFSD_List, NFSD_Export_features, NFSD_Fh, @@ -54,6 +56,7 @@ enum { /* * write() for these nodes. */ +#ifdef CONFIG_NFSD_DEPRECATED static ssize_t write_svc(struct file *file, char *buf, size_t size); static ssize_t write_add(struct file *file, char *buf, size_t size); static ssize_t write_del(struct file *file, char *buf, size_t size); @@ -61,6 +64,7 @@ static ssize_t write_export(struct file *file, char *buf, size_t size); static ssize_t write_unexport(struct file *file, char *buf, size_t size); static ssize_t write_getfd(struct file *file, char *buf, size_t size); static ssize_t write_getfs(struct file *file, char *buf, size_t size); +#endif static ssize_t write_filehandle(struct file *file, char *buf, size_t size); static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size); static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size); @@ -76,6 +80,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); #endif static ssize_t (*write_op[])(struct file *, char *, size_t) = { +#ifdef CONFIG_NFSD_DEPRECATED [NFSD_Svc] = write_svc, [NFSD_Add] = write_add, [NFSD_Del] = write_del, @@ -83,6 +88,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_Unexport] = write_unexport, [NFSD_Getfd] = write_getfd, [NFSD_Getfs] = write_getfs, +#endif [NFSD_Fh] = write_filehandle, [NFSD_FO_UnlockIP] = write_unlock_ip, [NFSD_FO_UnlockFS] = write_unlock_fs, @@ -196,6 +202,7 @@ static const struct file_operations pool_stats_operations = { * payload - write methods */ +#ifdef CONFIG_NFSD_DEPRECATED /** * write_svc - Start kernel's NFSD server * @@ -491,6 +498,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size) out: return err; } +#endif /* CONFIG_NFSD_DEPRECATED */ /** * write_unlock_ip - Release all locks used by a client @@ -1365,6 +1373,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) static int nfsd_fill_super(struct super_block * sb, void * data, int silent) { static struct tree_descr nfsd_files[] = { +#ifdef CONFIG_NFSD_DEPRECATED [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR}, [NFSD_Add] = {".add", &transaction_ops, S_IWUSR}, [NFSD_Del] = {".del", &transaction_ops, S_IWUSR}, @@ -1372,6 +1381,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR}, [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, +#endif [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", &export_features_operations, S_IRUGO}, From 049ef27b224ecc33958465fef83d5e4e8a056115 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 23 Sep 2010 18:26:58 +0400 Subject: [PATCH 18/99] nfsd: Export get_task_comm for nfsd The git://linux-nfs.org/~bfields/linux.git nfsd-next branch doesn't compile when nfsd is a module with the following error: ERROR: "get_task_comm" [fs/nfsd/nfsd.ko] undefined! Replace the get_task_comm call with direct comm access, which is safe for current. Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index b278e444e2f4..7b2fa1d25af7 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -129,12 +129,10 @@ static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size { static int warned; if (file->f_dentry->d_name.name[0] == '.' && !warned) { - char name[sizeof(current->comm)]; printk(KERN_INFO "Warning: \"%s\" uses deprecated NFSD interface: %s." " This will be removed in 2.6.40\n", - get_task_comm(name, current), - file->f_dentry->d_name.name); + current->comm, file->f_dentry->d_name.name); warned = 1; } if (! file->private_data) { From 74ec1e1269eba65b5f8e810cf0363ddb7aa64de5 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 24 Sep 2010 17:43:59 -0400 Subject: [PATCH 19/99] nfsd: fix /proc/net/rpc/nfsd.export/content display Note with "first" always 0, and "lastflags" initially 0, we always dump a spurious set of 0 flags at the start, among other problems. Fix. And attempt to make the code a little more obvious. Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index a3c7d0ceb24f..067e2e612e2d 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1470,25 +1470,43 @@ static void show_secinfo_flags(struct seq_file *m, int flags) show_expflags(m, flags, NFSEXP_SECINFO_FLAGS); } +static bool secinfo_flags_equal(int f, int g) +{ + f &= NFSEXP_SECINFO_FLAGS; + g &= NFSEXP_SECINFO_FLAGS; + return f == g; +} + +static int show_secinfo_run(struct seq_file *m, struct exp_flavor_info **fp, struct exp_flavor_info *end) +{ + int flags; + + flags = (*fp)->flags; + seq_printf(m, ",sec=%d", (*fp)->pseudoflavor); + (*fp)++; + while (*fp != end && secinfo_flags_equal(flags, (*fp)->flags)) { + seq_printf(m, ":%d", (*fp)->pseudoflavor); + (*fp)++; + } + return flags; +} + static void show_secinfo(struct seq_file *m, struct svc_export *exp) { struct exp_flavor_info *f; struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; - int lastflags = 0, first = 0; + int flags; if (exp->ex_nflavors == 0) return; - for (f = exp->ex_flavors; f < end; f++) { - if (first || f->flags != lastflags) { - if (!first) - show_secinfo_flags(m, lastflags); - seq_printf(m, ",sec=%d", f->pseudoflavor); - lastflags = f->flags; - } else { - seq_printf(m, ":%d", f->pseudoflavor); - } + f = exp->ex_flavors; + flags = show_secinfo_run(m, &f, end); + if (!secinfo_flags_equal(flags, exp->ex_flags)) + show_secinfo_flags(m, flags); + while (f != end) { + flags = show_secinfo_run(m, &f, end); + show_secinfo_flags(m, flags); } - show_secinfo_flags(m, lastflags); } static void exp_flags(struct seq_file *m, int flag, int fsid, From bf18ab32ff2a50a3d13d559f26f94ecfba131f24 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 27 Sep 2010 13:57:36 +0400 Subject: [PATCH 20/99] sunrpc: Pass the ip_map_parse's cd to lower calls The target is to have many ip_map_cache-s in the system. This particular patch handles its usage by the ip_map_parse callback. Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index e91b550bc836..31b99c599e7e 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -178,8 +178,8 @@ static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h) return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); } -static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr); -static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); +static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr); +static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry); static int ip_map_parse(struct cache_detail *cd, char *mesg, int mlen) @@ -249,9 +249,9 @@ static int ip_map_parse(struct cache_detail *cd, dom = NULL; /* IPv6 scope IDs are ignored for now */ - ipmp = ip_map_lookup(class, &sin6.sin6_addr); + ipmp = __ip_map_lookup(cd, class, &sin6.sin6_addr); if (ipmp) { - err = ip_map_update(ipmp, + err = __ip_map_update(cd, ipmp, container_of(dom, struct unix_domain, h), expiry); } else @@ -309,14 +309,15 @@ struct cache_detail ip_map_cache = { .alloc = ip_map_alloc, }; -static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr) +static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, + struct in6_addr *addr) { struct ip_map ip; struct cache_head *ch; strcpy(ip.m_class, class); ipv6_addr_copy(&ip.m_addr, addr); - ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h, + ch = sunrpc_cache_lookup(cd, &ip.h, hash_str(class, IP_HASHBITS) ^ hash_ip6(*addr)); @@ -326,7 +327,13 @@ static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr) return NULL; } -static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry) +static inline struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr) +{ + return __ip_map_lookup(&ip_map_cache, class, addr); +} + +static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, + struct unix_domain *udom, time_t expiry) { struct ip_map ip; struct cache_head *ch; @@ -344,16 +351,20 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex ip.m_add_change++; } ip.h.expiry_time = expiry; - ch = sunrpc_cache_update(&ip_map_cache, - &ip.h, &ipm->h, + ch = sunrpc_cache_update(cd, &ip.h, &ipm->h, hash_str(ipm->m_class, IP_HASHBITS) ^ hash_ip6(ipm->m_addr)); if (!ch) return -ENOMEM; - cache_put(ch, &ip_map_cache); + cache_put(ch, cd); return 0; } +static inline int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry) +{ + return __ip_map_update(&ip_map_cache, ipm, udom, expiry); +} + int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom) { struct unix_domain *udom; From e3bfca01c1ad378deaee598292bcc7ee19024563 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 27 Sep 2010 13:58:42 +0400 Subject: [PATCH 21/99] sunrpc: Make xprt auth cache release work with the xprt This is done in order to facilitate getting the ip_map_cache from which to put the ip_map. Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svcauth.h | 3 ++- net/sunrpc/svc_xprt.c | 5 ++--- net/sunrpc/svcauth_unix.c | 9 ++++++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 11266935e2d6..18bce95255a4 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -116,6 +116,7 @@ struct auth_ops { #define SVC_PENDING 9 #define SVC_COMPLETE 10 +struct svc_xprt; extern int svc_authenticate(struct svc_rqst *rqstp, __be32 *authp); extern int svc_authorise(struct svc_rqst *rqstp); @@ -131,7 +132,7 @@ extern struct auth_domain *auth_domain_find(char *name); extern struct auth_domain *auth_unix_lookup(struct in6_addr *addr); extern int auth_unix_forget_old(struct auth_domain *dom); extern void svcauth_unix_purge(void); -extern void svcauth_unix_info_release(void *); +extern void svcauth_unix_info_release(struct svc_xprt *xpt); extern int svcauth_unix_set_client(struct svc_rqst *rqstp); static inline unsigned long hash_str(char *name, int bits) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 95fc3e8c51d6..385d822419ca 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -128,9 +128,8 @@ static void svc_xprt_free(struct kref *kref) struct svc_xprt *xprt = container_of(kref, struct svc_xprt, xpt_ref); struct module *owner = xprt->xpt_class->xcl_owner; - if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) && - xprt->xpt_auth_cache != NULL) - svcauth_unix_info_release(xprt->xpt_auth_cache); + if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) + svcauth_unix_info_release(xprt); xprt->xpt_ops->xpo_free(xprt); module_put(owner); } diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 31b99c599e7e..49e39ff22910 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -472,10 +472,13 @@ ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) } void -svcauth_unix_info_release(void *info) +svcauth_unix_info_release(struct svc_xprt *xpt) { - struct ip_map *ipm = info; - cache_put(&ipm->h, &ip_map_cache); + struct ip_map *ipm; + + ipm = xpt->xpt_auth_cache; + if (ipm != NULL) + cache_put(&ipm->h, &ip_map_cache); } /**************************************************************************** From 3be4479fdf2fb7eb17a4592917ae4b536058b0c7 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 27 Sep 2010 13:59:13 +0400 Subject: [PATCH 22/99] sunrpc: Pass xprt to cached get/put routines They do not require the rqst actually and having the xprt simplifies further patching. Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 49e39ff22910..f4751805ecfe 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -426,10 +426,9 @@ void svcauth_unix_purge(void) EXPORT_SYMBOL_GPL(svcauth_unix_purge); static inline struct ip_map * -ip_map_cached_get(struct svc_rqst *rqstp) +ip_map_cached_get(struct svc_xprt *xprt) { struct ip_map *ipm = NULL; - struct svc_xprt *xprt = rqstp->rq_xprt; if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { spin_lock(&xprt->xpt_lock); @@ -454,10 +453,8 @@ ip_map_cached_get(struct svc_rqst *rqstp) } static inline void -ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) +ip_map_cached_put(struct svc_xprt *xprt, struct ip_map *ipm) { - struct svc_xprt *xprt = rqstp->rq_xprt; - if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { spin_lock(&xprt->xpt_lock); if (xprt->xpt_auth_cache == NULL) { @@ -707,6 +704,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) struct ip_map *ipm; struct group_info *gi; struct svc_cred *cred = &rqstp->rq_cred; + struct svc_xprt *xprt = rqstp->rq_xprt; switch (rqstp->rq_addr.ss_family) { case AF_INET: @@ -725,7 +723,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) if (rqstp->rq_proc == 0) return SVC_OK; - ipm = ip_map_cached_get(rqstp); + ipm = ip_map_cached_get(xprt); if (ipm == NULL) ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, &sin6->sin6_addr); @@ -745,7 +743,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) case 0: rqstp->rq_client = &ipm->m_client->h; kref_get(&rqstp->rq_client->ref); - ip_map_cached_put(rqstp, ipm); + ip_map_cached_put(xprt, ipm); break; } From 352114f395bd79353faf0bc1506ead94de393f55 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 27 Sep 2010 13:59:48 +0400 Subject: [PATCH 23/99] sunrpc: Add net to pure API calls There are two calls that operate on ip_map_cache and are directly called from the nfsd code. Other places will be handled in a different way. Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 2 +- fs/nfsd/nfsctl.c | 4 ++-- include/linux/sunrpc/svcauth.h | 4 ++-- net/sunrpc/svcauth_unix.c | 18 ++++++++++-------- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 067e2e612e2d..c0fcb7ab7f6d 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1593,7 +1593,7 @@ exp_addclient(struct nfsctl_client *ncp) /* Insert client into hashtable. */ for (i = 0; i < ncp->cl_naddr; i++) { ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6); - auth_unix_add_addr(&addr6, dom); + auth_unix_add_addr(&init_net, &addr6, dom); } auth_unix_forget_old(dom); auth_domain_put(dom); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7b2fa1d25af7..b6e192d25633 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -416,7 +416,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size) ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); - clp = auth_unix_lookup(&in6); + clp = auth_unix_lookup(&init_net, &in6); if (!clp) err = -EPERM; else { @@ -479,7 +479,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size) ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); - clp = auth_unix_lookup(&in6); + clp = auth_unix_lookup(&init_net, &in6); if (!clp) err = -EPERM; else { diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 18bce95255a4..25d333c1b571 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -126,10 +126,10 @@ extern void svc_auth_unregister(rpc_authflavor_t flavor); extern struct auth_domain *unix_domain_find(char *name); extern void auth_domain_put(struct auth_domain *item); -extern int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom); +extern int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom); extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); extern struct auth_domain *auth_domain_find(char *name); -extern struct auth_domain *auth_unix_lookup(struct in6_addr *addr); +extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr); extern int auth_unix_forget_old(struct auth_domain *dom); extern void svcauth_unix_purge(void); extern void svcauth_unix_info_release(struct svc_xprt *xpt); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index f4751805ecfe..2a76c7cf603e 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -327,7 +327,8 @@ static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, return NULL; } -static inline struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr) +static inline struct ip_map *ip_map_lookup(struct net *net, char *class, + struct in6_addr *addr) { return __ip_map_lookup(&ip_map_cache, class, addr); } @@ -360,12 +361,13 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, return 0; } -static inline int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry) +static inline int ip_map_update(struct net *net, struct ip_map *ipm, + struct unix_domain *udom, time_t expiry) { return __ip_map_update(&ip_map_cache, ipm, udom, expiry); } -int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom) +int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom) { struct unix_domain *udom; struct ip_map *ipmp; @@ -373,10 +375,10 @@ int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom) if (dom->flavour != &svcauth_unix) return -EINVAL; udom = container_of(dom, struct unix_domain, h); - ipmp = ip_map_lookup("nfsd", addr); + ipmp = ip_map_lookup(net, "nfsd", addr); if (ipmp) - return ip_map_update(ipmp, udom, NEVER); + return ip_map_update(net, ipmp, udom, NEVER); else return -ENOMEM; } @@ -394,12 +396,12 @@ int auth_unix_forget_old(struct auth_domain *dom) } EXPORT_SYMBOL_GPL(auth_unix_forget_old); -struct auth_domain *auth_unix_lookup(struct in6_addr *addr) +struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr) { struct ip_map *ipm; struct auth_domain *rv; - ipm = ip_map_lookup("nfsd", addr); + ipm = ip_map_lookup(net, "nfsd", addr); if (!ipm) return NULL; @@ -725,7 +727,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) ipm = ip_map_cached_get(xprt); if (ipm == NULL) - ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, + ipm = ip_map_lookup(&init_net, rqstp->rq_server->sv_program->pg_class, &sin6->sin6_addr); if (ipm == NULL) From 593ce16b943ea37d4ec62c377b32d7f3f4085e84 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 27 Sep 2010 14:00:15 +0400 Subject: [PATCH 24/99] sunrpc: Add routines that allow registering per-net caches Existing calls do the same, but for the init_net. Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 2 ++ net/sunrpc/cache.c | 27 +++++++++++++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 03496357f455..6950c981882d 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -197,7 +197,9 @@ extern void cache_purge(struct cache_detail *detail); #define NEVER (0x7FFFFFFF) extern void __init cache_initialize(void); extern int cache_register(struct cache_detail *cd); +extern int cache_register_net(struct cache_detail *cd, struct net *net); extern void cache_unregister(struct cache_detail *cd); +extern void cache_unregister_net(struct cache_detail *cd, struct net *net); extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, mode_t, struct cache_detail *); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index ff733dfef3b8..e84e7ddeecd4 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -34,6 +34,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_CACHE @@ -1537,7 +1538,7 @@ static const struct file_operations cache_flush_operations_procfs = { .release = release_flush_procfs, }; -static void remove_cache_proc_entries(struct cache_detail *cd) +static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net) { if (cd->u.procfs.proc_ent == NULL) return; @@ -1552,7 +1553,7 @@ static void remove_cache_proc_entries(struct cache_detail *cd) } #ifdef CONFIG_PROC_FS -static int create_cache_proc_entries(struct cache_detail *cd) +static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) { struct proc_dir_entry *p; @@ -1587,11 +1588,11 @@ static int create_cache_proc_entries(struct cache_detail *cd) } return 0; out_nomem: - remove_cache_proc_entries(cd); + remove_cache_proc_entries(cd, net); return -ENOMEM; } #else /* CONFIG_PROC_FS */ -static int create_cache_proc_entries(struct cache_detail *cd) +static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) { return 0; } @@ -1602,22 +1603,32 @@ void __init cache_initialize(void) INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean); } -int cache_register(struct cache_detail *cd) +int cache_register_net(struct cache_detail *cd, struct net *net) { int ret; sunrpc_init_cache_detail(cd); - ret = create_cache_proc_entries(cd); + ret = create_cache_proc_entries(cd, net); if (ret) sunrpc_destroy_cache_detail(cd); return ret; } + +int cache_register(struct cache_detail *cd) +{ + return cache_register_net(cd, &init_net); +} EXPORT_SYMBOL_GPL(cache_register); +void cache_unregister_net(struct cache_detail *cd, struct net *net) +{ + remove_cache_proc_entries(cd, net); + sunrpc_destroy_cache_detail(cd); +} + void cache_unregister(struct cache_detail *cd) { - remove_cache_proc_entries(cd); - sunrpc_destroy_cache_detail(cd); + cache_unregister_net(cd, &init_net); } EXPORT_SYMBOL_GPL(cache_unregister); From 4fb8518bdac8e85f6580ea3f586adf396cd472bc Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 27 Sep 2010 14:00:49 +0400 Subject: [PATCH 25/99] sunrpc: Tag svc_xprt with net The transport representation should be per-net of course. Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 2 ++ net/sunrpc/svc_xprt.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 5f4e18b3ce73..e50e3eca1c7c 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -66,6 +66,8 @@ struct svc_xprt { struct sockaddr_storage xpt_remote; /* remote peer's address */ size_t xpt_remotelen; /* length of address */ struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ + + struct net *xpt_net; }; int svc_reg_xprt_class(struct svc_xprt_class *); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 385d822419ca..f7e8915051b1 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -130,6 +130,7 @@ static void svc_xprt_free(struct kref *kref) struct module *owner = xprt->xpt_class->xcl_owner; if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) svcauth_unix_info_release(xprt); + put_net(xprt->xpt_net); xprt->xpt_ops->xpo_free(xprt); module_put(owner); } @@ -159,6 +160,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, spin_lock_init(&xprt->xpt_lock); set_bit(XPT_BUSY, &xprt->xpt_flags); rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); + xprt->xpt_net = get_net(&init_net); } EXPORT_SYMBOL_GPL(svc_xprt_init); From 2f72c9b73730c335381b13e2bd221abe1acea394 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 27 Sep 2010 14:01:27 +0400 Subject: [PATCH 26/99] sunrpc: The per-net skeleton Register empty per-net operations for the sunrpc layer. Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- net/sunrpc/netns.h | 12 ++++++++++++ net/sunrpc/sunrpc_syms.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 net/sunrpc/netns.h diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h new file mode 100644 index 000000000000..b2d18af2815e --- /dev/null +++ b/net/sunrpc/netns.h @@ -0,0 +1,12 @@ +#ifndef __SUNRPC_NETNS_H__ +#define __SUNRPC_NETNS_H__ + +#include +#include + +struct sunrpc_net { +}; + +extern int sunrpc_net_id; + +#endif diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index c0d085013a2b..faa23229bd25 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -22,6 +22,26 @@ #include #include +#include "netns.h" + +int sunrpc_net_id; + +static __net_init int sunrpc_init_net(struct net *net) +{ + return 0; +} + +static __net_exit void sunrpc_exit_net(struct net *net) +{ +} + +static struct pernet_operations sunrpc_net_ops = { + .init = sunrpc_init_net, + .exit = sunrpc_exit_net, + .id = &sunrpc_net_id, + .size = sizeof(struct sunrpc_net), +}; + extern struct cache_detail ip_map_cache, unix_gid_cache; extern void cleanup_rpcb_clnt(void); @@ -38,18 +58,26 @@ init_sunrpc(void) err = rpcauth_init_module(); if (err) goto out3; + + cache_initialize(); + + err = register_pernet_subsys(&sunrpc_net_ops); + if (err) + goto out4; #ifdef RPC_DEBUG rpc_register_sysctl(); #endif #ifdef CONFIG_PROC_FS rpc_proc_init(); #endif - cache_initialize(); cache_register(&ip_map_cache); cache_register(&unix_gid_cache); svc_init_xprt_sock(); /* svc sock transport */ init_socket_xprt(); /* clnt sock transport */ return 0; + +out4: + rpcauth_remove_module(); out3: rpc_destroy_mempool(); out2: @@ -69,6 +97,7 @@ cleanup_sunrpc(void) rpc_destroy_mempool(); cache_unregister(&ip_map_cache); cache_unregister(&unix_gid_cache); + unregister_pernet_subsys(&sunrpc_net_ops); #ifdef RPC_DEBUG rpc_unregister_sysctl(); #endif From 4f42d0d53ca4737f82937edb0efc83564c124853 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 27 Sep 2010 14:01:58 +0400 Subject: [PATCH 27/99] sunrpc: Make the /proc/net/rpc appear in net namespaces Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/stats.h | 23 ++++++++++++------- net/sunrpc/cache.c | 11 ++++++--- net/sunrpc/netns.h | 1 + net/sunrpc/stats.c | 43 +++++++++++++++++++++--------------- net/sunrpc/sunrpc_syms.c | 16 +++++++++----- 5 files changed, 59 insertions(+), 35 deletions(-) diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h index 5fa0f2084307..680471d1f28a 100644 --- a/include/linux/sunrpc/stats.h +++ b/include/linux/sunrpc/stats.h @@ -38,8 +38,21 @@ struct svc_stat { rpcbadclnt; }; -void rpc_proc_init(void); -void rpc_proc_exit(void); +struct net; +#ifdef CONFIG_PROC_FS +int rpc_proc_init(struct net *); +void rpc_proc_exit(struct net *); +#else +static inline int rpc_proc_init(struct net *net) +{ + return 0; +} + +static inline void rpc_proc_exit(struct net *net) +{ +} +#endif + #ifdef MODULE void rpc_modcount(struct inode *, int); #endif @@ -54,9 +67,6 @@ void svc_proc_unregister(const char *); void svc_seq_show(struct seq_file *, const struct svc_stat *); - -extern struct proc_dir_entry *proc_net_rpc; - #else static inline struct proc_dir_entry *rpc_proc_register(struct rpc_stat *s) { return NULL; } @@ -69,9 +79,6 @@ static inline void svc_proc_unregister(const char *p) {} static inline void svc_seq_show(struct seq_file *seq, const struct svc_stat *st) {} - -#define proc_net_rpc NULL - #endif #endif /* _LINUX_SUNRPC_STATS_H */ diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index e84e7ddeecd4..e20968aac68a 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include "netns.h" #define RPCDBG_FACILITY RPCDBG_CACHE @@ -1540,6 +1540,8 @@ static const struct file_operations cache_flush_operations_procfs = { static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net) { + struct sunrpc_net *sn; + if (cd->u.procfs.proc_ent == NULL) return; if (cd->u.procfs.flush_ent) @@ -1549,15 +1551,18 @@ static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net) if (cd->u.procfs.content_ent) remove_proc_entry("content", cd->u.procfs.proc_ent); cd->u.procfs.proc_ent = NULL; - remove_proc_entry(cd->name, proc_net_rpc); + sn = net_generic(net, sunrpc_net_id); + remove_proc_entry(cd->name, sn->proc_net_rpc); } #ifdef CONFIG_PROC_FS static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) { struct proc_dir_entry *p; + struct sunrpc_net *sn; - cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc); + sn = net_generic(net, sunrpc_net_id); + cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc); if (cd->u.procfs.proc_ent == NULL) goto out_nomem; cd->u.procfs.channel_ent = NULL; diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index b2d18af2815e..e52ce897dde5 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -5,6 +5,7 @@ #include struct sunrpc_net { + struct proc_dir_entry *proc_net_rpc; }; extern int sunrpc_net_id; diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index ea1046f3f9a3..f71a73107ae9 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -22,12 +22,11 @@ #include #include #include -#include + +#include "netns.h" #define RPCDBG_FACILITY RPCDBG_MISC -struct proc_dir_entry *proc_net_rpc = NULL; - /* * Get RPC client stats */ @@ -218,10 +217,11 @@ EXPORT_SYMBOL_GPL(rpc_print_iostats); static inline struct proc_dir_entry * do_register(const char *name, void *data, const struct file_operations *fops) { - rpc_proc_init(); - dprintk("RPC: registering /proc/net/rpc/%s\n", name); + struct sunrpc_net *sn; - return proc_create_data(name, 0, proc_net_rpc, fops, data); + dprintk("RPC: registering /proc/net/rpc/%s\n", name); + sn = net_generic(&init_net, sunrpc_net_id); + return proc_create_data(name, 0, sn->proc_net_rpc, fops, data); } struct proc_dir_entry * @@ -234,7 +234,10 @@ EXPORT_SYMBOL_GPL(rpc_proc_register); void rpc_proc_unregister(const char *name) { - remove_proc_entry(name, proc_net_rpc); + struct sunrpc_net *sn; + + sn = net_generic(&init_net, sunrpc_net_id); + remove_proc_entry(name, sn->proc_net_rpc); } EXPORT_SYMBOL_GPL(rpc_proc_unregister); @@ -248,25 +251,29 @@ EXPORT_SYMBOL_GPL(svc_proc_register); void svc_proc_unregister(const char *name) { - remove_proc_entry(name, proc_net_rpc); + struct sunrpc_net *sn; + + sn = net_generic(&init_net, sunrpc_net_id); + remove_proc_entry(name, sn->proc_net_rpc); } EXPORT_SYMBOL_GPL(svc_proc_unregister); -void -rpc_proc_init(void) +int rpc_proc_init(struct net *net) { + struct sunrpc_net *sn; + dprintk("RPC: registering /proc/net/rpc\n"); - if (!proc_net_rpc) - proc_net_rpc = proc_mkdir("rpc", init_net.proc_net); + sn = net_generic(net, sunrpc_net_id); + sn->proc_net_rpc = proc_mkdir("rpc", net->proc_net); + if (sn->proc_net_rpc == NULL) + return -ENOMEM; + + return 0; } -void -rpc_proc_exit(void) +void rpc_proc_exit(struct net *net) { dprintk("RPC: unregistering /proc/net/rpc\n"); - if (proc_net_rpc) { - proc_net_rpc = NULL; - remove_proc_entry("rpc", init_net.proc_net); - } + remove_proc_entry("rpc", net->proc_net); } diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index faa23229bd25..c076af8535db 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -28,11 +28,21 @@ int sunrpc_net_id; static __net_init int sunrpc_init_net(struct net *net) { + int err; + + err = rpc_proc_init(net); + if (err) + goto err_proc; + return 0; + +err_proc: + return err; } static __net_exit void sunrpc_exit_net(struct net *net) { + rpc_proc_exit(net); } static struct pernet_operations sunrpc_net_ops = { @@ -66,9 +76,6 @@ init_sunrpc(void) goto out4; #ifdef RPC_DEBUG rpc_register_sysctl(); -#endif -#ifdef CONFIG_PROC_FS - rpc_proc_init(); #endif cache_register(&ip_map_cache); cache_register(&unix_gid_cache); @@ -100,9 +107,6 @@ cleanup_sunrpc(void) unregister_pernet_subsys(&sunrpc_net_ops); #ifdef RPC_DEBUG rpc_unregister_sysctl(); -#endif -#ifdef CONFIG_PROC_FS - rpc_proc_exit(); #endif rcu_barrier(); /* Wait for completion of call_rcu()'s */ } From 90d51b02fd702d969eb05bd9d4ecc954759fbe23 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 27 Sep 2010 14:02:29 +0400 Subject: [PATCH 28/99] sunrpc: Make the ip_map_cache be per-net Everything that is required for that already exists: * the per-net cache registration with respective proc entries * the context (struct net) is available in all the users Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- net/sunrpc/netns.h | 6 ++ net/sunrpc/sunrpc_syms.c | 11 +++- net/sunrpc/svcauth_unix.c | 122 +++++++++++++++++++++++++++++--------- 3 files changed, 108 insertions(+), 31 deletions(-) diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index e52ce897dde5..d013bf211cae 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -4,10 +4,16 @@ #include #include +struct cache_detail; + struct sunrpc_net { struct proc_dir_entry *proc_net_rpc; + struct cache_detail *ip_map_cache; }; extern int sunrpc_net_id; +int ip_map_cache_create(struct net *); +void ip_map_cache_destroy(struct net *); + #endif diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index c076af8535db..9d0809160994 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -34,14 +34,21 @@ static __net_init int sunrpc_init_net(struct net *net) if (err) goto err_proc; + err = ip_map_cache_create(net); + if (err) + goto err_ipmap; + return 0; +err_ipmap: + rpc_proc_exit(net); err_proc: return err; } static __net_exit void sunrpc_exit_net(struct net *net) { + ip_map_cache_destroy(net); rpc_proc_exit(net); } @@ -52,7 +59,7 @@ static struct pernet_operations sunrpc_net_ops = { .size = sizeof(struct sunrpc_net), }; -extern struct cache_detail ip_map_cache, unix_gid_cache; +extern struct cache_detail unix_gid_cache; extern void cleanup_rpcb_clnt(void); @@ -77,7 +84,6 @@ init_sunrpc(void) #ifdef RPC_DEBUG rpc_register_sysctl(); #endif - cache_register(&ip_map_cache); cache_register(&unix_gid_cache); svc_init_xprt_sock(); /* svc sock transport */ init_socket_xprt(); /* clnt sock transport */ @@ -102,7 +108,6 @@ cleanup_sunrpc(void) svc_cleanup_xprt_sock(); unregister_rpc_pipefs(); rpc_destroy_mempool(); - cache_unregister(&ip_map_cache); cache_unregister(&unix_gid_cache); unregister_pernet_subsys(&sunrpc_net_ops); #ifdef RPC_DEBUG diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 2a76c7cf603e..8b378f91f255 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -18,6 +18,8 @@ #include +#include "netns.h" + /* * AUTHUNIX and AUTHNULL credentials are both handled here. * AUTHNULL is treated just like AUTHUNIX except that the uid/gid @@ -92,7 +94,6 @@ struct ip_map { struct unix_domain *m_client; int m_add_change; }; -static struct cache_head *ip_table[IP_HASHMAX]; static void ip_map_put(struct kref *kref) { @@ -294,21 +295,6 @@ static int ip_map_show(struct seq_file *m, } -struct cache_detail ip_map_cache = { - .owner = THIS_MODULE, - .hash_size = IP_HASHMAX, - .hash_table = ip_table, - .name = "auth.unix.ip", - .cache_put = ip_map_put, - .cache_upcall = ip_map_upcall, - .cache_parse = ip_map_parse, - .cache_show = ip_map_show, - .match = ip_map_match, - .init = ip_map_init, - .update = update, - .alloc = ip_map_alloc, -}; - static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr) { @@ -330,7 +316,10 @@ static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, static inline struct ip_map *ip_map_lookup(struct net *net, char *class, struct in6_addr *addr) { - return __ip_map_lookup(&ip_map_cache, class, addr); + struct sunrpc_net *sn; + + sn = net_generic(net, sunrpc_net_id); + return __ip_map_lookup(sn->ip_map_cache, class, addr); } static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, @@ -364,7 +353,10 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, static inline int ip_map_update(struct net *net, struct ip_map *ipm, struct unix_domain *udom, time_t expiry) { - return __ip_map_update(&ip_map_cache, ipm, udom, expiry); + struct sunrpc_net *sn; + + sn = net_generic(net, sunrpc_net_id); + return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry); } int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom) @@ -400,12 +392,14 @@ struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr) { struct ip_map *ipm; struct auth_domain *rv; + struct sunrpc_net *sn; + sn = net_generic(net, sunrpc_net_id); ipm = ip_map_lookup(net, "nfsd", addr); if (!ipm) return NULL; - if (cache_check(&ip_map_cache, &ipm->h, NULL)) + if (cache_check(sn->ip_map_cache, &ipm->h, NULL)) return NULL; if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) { @@ -416,14 +410,21 @@ struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr) rv = &ipm->m_client->h; kref_get(&rv->ref); } - cache_put(&ipm->h, &ip_map_cache); + cache_put(&ipm->h, sn->ip_map_cache); return rv; } EXPORT_SYMBOL_GPL(auth_unix_lookup); void svcauth_unix_purge(void) { - cache_purge(&ip_map_cache); + struct net *net; + + for_each_net(net) { + struct sunrpc_net *sn; + + sn = net_generic(net, sunrpc_net_id); + cache_purge(sn->ip_map_cache); + } } EXPORT_SYMBOL_GPL(svcauth_unix_purge); @@ -431,6 +432,7 @@ static inline struct ip_map * ip_map_cached_get(struct svc_xprt *xprt) { struct ip_map *ipm = NULL; + struct sunrpc_net *sn; if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { spin_lock(&xprt->xpt_lock); @@ -442,9 +444,10 @@ ip_map_cached_get(struct svc_xprt *xprt) * remembered, e.g. by a second mount from the * same IP address. */ + sn = net_generic(xprt->xpt_net, sunrpc_net_id); xprt->xpt_auth_cache = NULL; spin_unlock(&xprt->xpt_lock); - cache_put(&ipm->h, &ip_map_cache); + cache_put(&ipm->h, sn->ip_map_cache); return NULL; } cache_get(&ipm->h); @@ -466,8 +469,12 @@ ip_map_cached_put(struct svc_xprt *xprt, struct ip_map *ipm) } spin_unlock(&xprt->xpt_lock); } - if (ipm) - cache_put(&ipm->h, &ip_map_cache); + if (ipm) { + struct sunrpc_net *sn; + + sn = net_generic(xprt->xpt_net, sunrpc_net_id); + cache_put(&ipm->h, sn->ip_map_cache); + } } void @@ -476,8 +483,12 @@ svcauth_unix_info_release(struct svc_xprt *xpt) struct ip_map *ipm; ipm = xpt->xpt_auth_cache; - if (ipm != NULL) - cache_put(&ipm->h, &ip_map_cache); + if (ipm != NULL) { + struct sunrpc_net *sn; + + sn = net_generic(xpt->xpt_net, sunrpc_net_id); + cache_put(&ipm->h, sn->ip_map_cache); + } } /**************************************************************************** @@ -707,6 +718,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) struct group_info *gi; struct svc_cred *cred = &rqstp->rq_cred; struct svc_xprt *xprt = rqstp->rq_xprt; + struct net *net = xprt->xpt_net; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); switch (rqstp->rq_addr.ss_family) { case AF_INET: @@ -727,13 +740,13 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) ipm = ip_map_cached_get(xprt); if (ipm == NULL) - ipm = ip_map_lookup(&init_net, rqstp->rq_server->sv_program->pg_class, + ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class, &sin6->sin6_addr); if (ipm == NULL) return SVC_DENIED; - switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { + switch (cache_check(sn->ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { default: BUG(); case -ETIMEDOUT: @@ -905,3 +918,56 @@ struct auth_ops svcauth_unix = { .set_client = svcauth_unix_set_client, }; +int ip_map_cache_create(struct net *net) +{ + int err = -ENOMEM; + struct cache_detail *cd; + struct cache_head **tbl; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL); + if (cd == NULL) + goto err_cd; + + tbl = kzalloc(IP_HASHMAX * sizeof(struct cache_head *), GFP_KERNEL); + if (tbl == NULL) + goto err_tbl; + + cd->owner = THIS_MODULE, + cd->hash_size = IP_HASHMAX, + cd->hash_table = tbl, + cd->name = "auth.unix.ip", + cd->cache_put = ip_map_put, + cd->cache_upcall = ip_map_upcall, + cd->cache_parse = ip_map_parse, + cd->cache_show = ip_map_show, + cd->match = ip_map_match, + cd->init = ip_map_init, + cd->update = update, + cd->alloc = ip_map_alloc, + + err = cache_register_net(cd, net); + if (err) + goto err_reg; + + sn->ip_map_cache = cd; + return 0; + +err_reg: + kfree(tbl); +err_tbl: + kfree(cd); +err_cd: + return err; +} + +void ip_map_cache_destroy(struct net *net) +{ + struct sunrpc_net *sn; + + sn = net_generic(net, sunrpc_net_id); + cache_purge(sn->ip_map_cache); + cache_unregister_net(sn->ip_map_cache, net); + kfree(sn->ip_map_cache->hash_table); + kfree(sn->ip_map_cache); +} From c135e84afb6bcec9cb8ef0492fa4867efbfaad91 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 29 Sep 2010 14:16:57 +1000 Subject: [PATCH 29/99] sunrpc: fix up rpcauth_remove_module section mismatch On Wed, 29 Sep 2010 14:02:38 +1000 Stephen Rothwell wrote: > > After merging the final tree, today's linux-next build (powerpc > ppc44x_defconfig) produced tis warning: > > WARNING: net/sunrpc/sunrpc.o(.init.text+0x110): Section mismatch in reference from the function init_sunrpc() to the function .exit.text:rpcauth_remove_module() > The function __init init_sunrpc() references > a function __exit rpcauth_remove_module(). > This is often seen when error handling in the init function > uses functionality in the exit path. > The fix is often to remove the __exit annotation of > rpcauth_remove_module() so it may be used outside an exit section. > > Probably caused by commit 2f72c9b73730c335381b13e2bd221abe1acea394 > ("sunrpc: The per-net skeleton"). This actually causes a build failure on a sparc32 defconfig build: `rpcauth_remove_module' referenced in section `.init.text' of net/built-in.o: defined in discarded section `.exit.text' of net/built-in.o I applied the following patch for today: Fixes: `rpcauth_remove_module' referenced in section `.init.text' of net/built-in.o: defined in discarded section `.exit.text' of net/built-in.o Signed-off-by: Stephen Rothwell Acked-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/auth.h | 4 ++-- net/sunrpc/auth.c | 2 +- net/sunrpc/auth_generic.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 5bbc447175dc..b2024757edd5 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -122,8 +122,8 @@ extern const struct rpc_authops authnull_ops; int __init rpc_init_authunix(void); int __init rpc_init_generic_auth(void); int __init rpcauth_init_module(void); -void __exit rpcauth_remove_module(void); -void __exit rpc_destroy_generic_auth(void); +void rpcauth_remove_module(void); +void rpc_destroy_generic_auth(void); void rpc_destroy_authunix(void); struct rpc_cred * rpc_lookup_cred(void); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index e9eaaf7d43c1..2c0d9e6093b8 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -658,7 +658,7 @@ int __init rpcauth_init_module(void) return err; } -void __exit rpcauth_remove_module(void) +void rpcauth_remove_module(void) { rpc_destroy_authunix(); rpc_destroy_generic_auth(); diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 43162bb3b78f..e010a015d996 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -158,7 +158,7 @@ int __init rpc_init_generic_auth(void) return rpcauth_init_credcache(&generic_auth); } -void __exit rpc_destroy_generic_auth(void) +void rpc_destroy_generic_auth(void) { rpcauth_destroy_credcache(&generic_auth); } From 2b44f1ba40914777f4b1075254ba97663d4e2574 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Thu, 30 Sep 2010 20:47:46 +0200 Subject: [PATCH 30/99] nfsd4: adjust buflen for encoded attrs bitmap based on actual bitmap length The existing code adjusted it based on the worst case scenario for the returned bitmap and the best case scenario for the supported attrs attribute. Signed-off-by: Benny Halevy [bfields@redhat.com: removed likely/unlikely's] Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1a468bbd330f..f35a94a04026 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1805,19 +1805,23 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, goto out_nfserr; } } - if ((buflen -= 16) < 0) - goto out_resource; - if (unlikely(bmval2)) { + if (bmval2) { + if ((buflen -= 16) < 0) + goto out_resource; WRITE32(3); WRITE32(bmval0); WRITE32(bmval1); WRITE32(bmval2); - } else if (likely(bmval1)) { + } else if (bmval1) { + if ((buflen -= 12) < 0) + goto out_resource; WRITE32(2); WRITE32(bmval0); WRITE32(bmval1); } else { + if ((buflen -= 8) < 0) + goto out_resource; WRITE32(1); WRITE32(bmval0); } @@ -1828,15 +1832,17 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, u32 word1 = nfsd_suppattrs1(minorversion); u32 word2 = nfsd_suppattrs2(minorversion); - if ((buflen -= 12) < 0) - goto out_resource; if (!aclsupport) word0 &= ~FATTR4_WORD0_ACL; if (!word2) { + if ((buflen -= 12) < 0) + goto out_resource; WRITE32(2); WRITE32(word0); WRITE32(word1); } else { + if ((buflen -= 16) < 0) + goto out_resource; WRITE32(3); WRITE32(word0); WRITE32(word1); From bd1722d4316e42a12fe6337ebe34d7e1e2c088b2 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 29 Sep 2010 16:02:43 +0400 Subject: [PATCH 31/99] sunrpc: Factor out rpc_xprt allocation Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprt.c | 22 ++++++++++++++++++++++ net/sunrpc/xprtrdma/transport.c | 13 ++----------- net/sunrpc/xprtsock.c | 15 +++------------ 4 files changed, 28 insertions(+), 23 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index ff5a77b28c50..00f6e3fe2900 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -280,6 +280,7 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release(struct rpc_task *task); struct rpc_xprt * xprt_get(struct rpc_xprt *xprt); void xprt_put(struct rpc_xprt *xprt); +struct rpc_xprt * xprt_alloc(int size, int max_req); static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 970fb00f388c..26cbe219388b 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -962,6 +962,28 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) spin_unlock(&xprt->reserve_lock); } +struct rpc_xprt *xprt_alloc(int size, int max_req) +{ + struct rpc_xprt *xprt; + + xprt = kzalloc(size, GFP_KERNEL); + if (xprt == NULL) + goto out; + + xprt->max_reqs = max_req; + xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL); + if (xprt->slot == NULL) + goto out_free; + + return xprt; + +out_free: + kfree(xprt); +out: + return NULL; +} +EXPORT_SYMBOL_GPL(xprt_alloc); + /** * xprt_reserve - allocate an RPC request slot * @task: RPC task requesting a slot allocation diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index a85e866a77f7..9d77bf25829f 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -285,23 +285,14 @@ xprt_setup_rdma(struct xprt_create *args) return ERR_PTR(-EBADF); } - xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL); + xprt = xprt_alloc(sizeof(struct rpcrdma_xprt), + xprt_rdma_slot_table_entries); if (xprt == NULL) { dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", __func__); return ERR_PTR(-ENOMEM); } - xprt->max_reqs = xprt_rdma_slot_table_entries; - xprt->slot = kcalloc(xprt->max_reqs, - sizeof(struct rpc_rqst), GFP_KERNEL); - if (xprt->slot == NULL) { - dprintk("RPC: %s: couldn't allocate %d slots\n", - __func__, xprt->max_reqs); - kfree(xprt); - return ERR_PTR(-ENOMEM); - } - /* 60 second timeout, no retries */ xprt->timeout = &xprt_rdma_default_timeout; xprt->bind_timeout = (60U * HZ); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b6309db56226..a7a763821b88 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2273,23 +2273,14 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, return ERR_PTR(-EBADF); } - new = kzalloc(sizeof(*new), GFP_KERNEL); - if (new == NULL) { + xprt = xprt_alloc(sizeof(*new), slot_table_size); + if (xprt == NULL) { dprintk("RPC: xs_setup_xprt: couldn't allocate " "rpc_xprt\n"); return ERR_PTR(-ENOMEM); } - xprt = &new->xprt; - - xprt->max_reqs = slot_table_size; - xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL); - if (xprt->slot == NULL) { - kfree(xprt); - dprintk("RPC: xs_setup_xprt: couldn't allocate slot " - "table\n"); - return ERR_PTR(-ENOMEM); - } + new = container_of(xprt, struct sock_xprt, xprt); memcpy(&xprt->addr, args->dstaddr, args->addrlen); xprt->addrlen = args->addrlen; if (args->srcaddr) From e204e621b4160c802315bc2d0fa335337c0d62e8 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 29 Sep 2010 16:03:13 +0400 Subject: [PATCH 32/99] sunrpc: Factor out rpc_xprt freeing Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprt.c | 7 +++++++ net/sunrpc/xprtrdma/transport.c | 7 ++----- net/sunrpc/xprtsock.c | 12 ++++-------- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 00f6e3fe2900..af4b560f0794 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -281,6 +281,7 @@ void xprt_release(struct rpc_task *task); struct rpc_xprt * xprt_get(struct rpc_xprt *xprt); void xprt_put(struct rpc_xprt *xprt); struct rpc_xprt * xprt_alloc(int size, int max_req); +void xprt_free(struct rpc_xprt *); static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 26cbe219388b..0637340e5342 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -984,6 +984,13 @@ struct rpc_xprt *xprt_alloc(int size, int max_req) } EXPORT_SYMBOL_GPL(xprt_alloc); +void xprt_free(struct rpc_xprt *xprt) +{ + kfree(xprt->slot); + kfree(xprt); +} +EXPORT_SYMBOL_GPL(xprt_free); + /** * xprt_reserve - allocate an RPC request slot * @task: RPC task requesting a slot allocation diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 9d77bf25829f..0f7a1b9d05ad 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -251,9 +251,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) xprt_rdma_free_addresses(xprt); - kfree(xprt->slot); - xprt->slot = NULL; - kfree(xprt); + xprt_free(xprt); dprintk("RPC: %s: returning\n", __func__); @@ -401,8 +399,7 @@ xprt_setup_rdma(struct xprt_create *args) out2: rpcrdma_ia_close(&new_xprt->rx_ia); out1: - kfree(xprt->slot); - kfree(xprt); + xprt_free(xprt); return ERR_PTR(rc); } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a7a763821b88..b1e36ec6fd80 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -774,8 +774,7 @@ static void xs_destroy(struct rpc_xprt *xprt) xs_close(xprt); xs_free_peer_addresses(xprt); - kfree(xprt->slot); - kfree(xprt); + xprt_free(xprt); module_put(THIS_MODULE); } @@ -2362,8 +2361,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) return xprt; ret = ERR_PTR(-EINVAL); out_err: - kfree(xprt->slot); - kfree(xprt); + xprt_free(xprt); return ret; } @@ -2438,8 +2436,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) return xprt; ret = ERR_PTR(-EINVAL); out_err: - kfree(xprt->slot); - kfree(xprt); + xprt_free(xprt); return ret; } @@ -2519,8 +2516,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) return xprt; ret = ERR_PTR(-EINVAL); out_err: - kfree(xprt->slot); - kfree(xprt); + xprt_free(xprt); return ret; } From fc5d00b04a3a58cac8620403dfe9f43f72578ec1 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 29 Sep 2010 16:03:50 +0400 Subject: [PATCH 33/99] sunrpc: Add net argument to svc_create_xprt Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 2 +- fs/nfs/callback.c | 4 ++-- fs/nfsd/nfsctl.c | 4 ++-- fs/nfsd/nfssvc.c | 5 +++-- include/linux/sunrpc/svc_xprt.h | 4 ++-- net/sunrpc/svc_xprt.c | 4 ++-- 6 files changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index f1bacf1a0391..b13aabc12298 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -206,7 +206,7 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name, xprt = svc_find_xprt(serv, name, family, 0); if (xprt == NULL) - return svc_create_xprt(serv, name, family, port, + return svc_create_xprt(serv, name, &init_net, family, port, SVC_SOCK_DEFAULTS); svc_xprt_put(xprt); return 0; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index e17b49e2eabd..aeec017fe814 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -109,7 +109,7 @@ nfs4_callback_up(struct svc_serv *serv) { int ret; - ret = svc_create_xprt(serv, "tcp", PF_INET, + ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret <= 0) goto out_err; @@ -117,7 +117,7 @@ nfs4_callback_up(struct svc_serv *serv) dprintk("NFS: Callback listener port = %u (af %u)\n", nfs_callback_tcpport, PF_INET); - ret = svc_create_xprt(serv, "tcp", PF_INET6, + ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret > 0) { nfs_callback_tcpport6 = ret; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index b6e192d25633..b81da24b768c 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1015,12 +1015,12 @@ static ssize_t __write_ports_addxprt(char *buf) if (err != 0) return err; - err = svc_create_xprt(nfsd_serv, transport, + err = svc_create_xprt(nfsd_serv, transport, &init_net, PF_INET, port, SVC_SOCK_ANONYMOUS); if (err < 0) goto out_err; - err = svc_create_xprt(nfsd_serv, transport, + err = svc_create_xprt(nfsd_serv, transport, &init_net, PF_INET6, port, SVC_SOCK_ANONYMOUS); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index e2c43464f237..2bae1d86f5f2 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "nfsd.h" #include "cache.h" #include "vfs.h" @@ -186,12 +187,12 @@ static int nfsd_init_socks(int port) if (!list_empty(&nfsd_serv->sv_permsocks)) return 0; - error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port, + error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, port, SVC_SOCK_DEFAULTS); if (error < 0) return error; - error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port, + error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, port, SVC_SOCK_DEFAULTS); if (error < 0) return error; diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index e50e3eca1c7c..646263cf815d 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -74,8 +74,8 @@ int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); -int svc_create_xprt(struct svc_serv *, const char *, const int, - const unsigned short, int); +int svc_create_xprt(struct svc_serv *, const char *, struct net *, + const int, const unsigned short, int); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_received(struct svc_xprt *); void svc_xprt_put(struct svc_xprt *xprt); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index f7e8915051b1..d80789a37d88 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -204,8 +204,8 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, } int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, - const int family, const unsigned short port, - int flags) + struct net *net, const int family, + const unsigned short port, int flags) { struct svc_xprt_class *xcl; From 62832c039eab9d03cd28a66427ce8276988f28b0 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 29 Sep 2010 16:04:18 +0400 Subject: [PATCH 34/99] sunrpc: Pull net argument downto svc_create_socket After this the socket creation in it knows the context. Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 5 +++-- net/sunrpc/svcsock.c | 10 +++++++--- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 ++ 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 646263cf815d..bb182979569e 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -12,6 +12,7 @@ struct svc_xprt_ops { struct svc_xprt *(*xpo_create)(struct svc_serv *, + struct net *net, struct sockaddr *, int, int); struct svc_xprt *(*xpo_accept)(struct svc_xprt *); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index d80789a37d88..678b6ee4da7b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -166,6 +166,7 @@ EXPORT_SYMBOL_GPL(svc_xprt_init); static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, struct svc_serv *serv, + struct net *net, const int family, const unsigned short port, int flags) @@ -200,7 +201,7 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, return ERR_PTR(-EAFNOSUPPORT); } - return xcl->xcl_ops->xpo_create(serv, sap, len, flags); + return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); } int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, @@ -221,7 +222,7 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, goto err; spin_unlock(&svc_xprt_class_lock); - newxprt = __svc_xpo_create(xcl, serv, family, port, flags); + newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags); if (IS_ERR(newxprt)) { module_put(xcl->xcl_owner); return PTR_ERR(newxprt); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 7e534dd09077..559338527f47 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -64,7 +64,8 @@ static void svc_tcp_sock_detach(struct svc_xprt *); static void svc_sock_free(struct svc_xprt *); static struct svc_xprt *svc_create_socket(struct svc_serv *, int, - struct sockaddr *, int, int); + struct net *, struct sockaddr *, + int, int); #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key svc_key[2]; static struct lock_class_key svc_slock_key[2]; @@ -657,10 +658,11 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt) } static struct svc_xprt *svc_udp_create(struct svc_serv *serv, + struct net *net, struct sockaddr *sa, int salen, int flags) { - return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags); + return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags); } static struct svc_xprt_ops svc_udp_ops = { @@ -1178,10 +1180,11 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt) } static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, + struct net *net, struct sockaddr *sa, int salen, int flags) { - return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags); + return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags); } static struct svc_xprt_ops svc_tcp_ops = { @@ -1385,6 +1388,7 @@ EXPORT_SYMBOL_GPL(svc_addsock); */ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, int protocol, + struct net *net, struct sockaddr *sin, int len, int flags) { diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index edea15a54e51..950a206600c0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -52,6 +52,7 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, + struct net *net, struct sockaddr *sa, int salen, int flags); static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); @@ -670,6 +671,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id, * Create a listening RDMA service endpoint. */ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, + struct net *net, struct sockaddr *sa, int salen, int flags) { From c653ce3f0aee9bb2b221ebf3579385c06f81efcd Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 29 Sep 2010 16:04:45 +0400 Subject: [PATCH 35/99] sunrpc: Add net to rpc_create_args Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 1 + fs/lockd/mon.c | 1 + fs/nfs/client.c | 1 + fs/nfs/mount_clnt.c | 2 ++ fs/nfsd/nfs4callback.c | 1 + include/linux/sunrpc/clnt.h | 1 + net/sunrpc/rpcb_clnt.c | 2 ++ 7 files changed, 9 insertions(+) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index bb464d12104c..25e21e4023b2 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -353,6 +353,7 @@ nlm_bind_host(struct nlm_host *host) .to_retries = 5U, }; struct rpc_create_args args = { + .net = &init_net, .protocol = host->h_proto, .address = nlm_addr(host), .addrsize = host->h_addrlen, diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index e3015464fbab..e0c918949644 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -69,6 +69,7 @@ static struct rpc_clnt *nsm_create(void) .sin_addr.s_addr = htonl(INADDR_LOOPBACK), }; struct rpc_create_args args = { + .net = &init_net, .protocol = XPRT_TRANSPORT_UDP, .address = (struct sockaddr *)&sin, .addrsize = sizeof(sin), diff --git a/fs/nfs/client.c b/fs/nfs/client.c index e7340729af89..351b71187b38 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -601,6 +601,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, { struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { + .net = &init_net, .protocol = clp->cl_proto, .address = (struct sockaddr *)&clp->cl_addr, .addrsize = clp->cl_addrlen, diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 59047f8d7d72..4b472038342b 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -153,6 +153,7 @@ int nfs_mount(struct nfs_mount_request *info) .rpc_resp = &result, }; struct rpc_create_args args = { + .net = &init_net, .protocol = info->protocol, .address = info->sap, .addrsize = info->salen, @@ -224,6 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info) .to_retries = 2, }; struct rpc_create_args args = { + .net = &init_net, .protocol = IPPROTO_UDP, .address = info->sap, .addrsize = info->salen, diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 014482c4e57d..1112f451295a 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -479,6 +479,7 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb) .to_retries = 0, }; struct rpc_create_args args = { + .net = &init_net, .protocol = XPRT_TRANSPORT_TCP, .address = (struct sockaddr *) &cb->cb_addr, .addrsize = cb->cb_addrlen, diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 85f38a63f098..58c4473f899a 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -102,6 +102,7 @@ struct rpc_procinfo { #ifdef __KERNEL__ struct rpc_create_args { + struct net *net; int protocol; struct sockaddr *address; size_t addrsize; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index dac219a56ae1..83af38df3267 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -177,6 +177,7 @@ static DEFINE_MUTEX(rpcb_create_local_mutex); static int rpcb_create_local(void) { struct rpc_create_args args = { + .net = &init_net, .protocol = XPRT_TRANSPORT_TCP, .address = (struct sockaddr *)&rpcb_inaddr_loopback, .addrsize = sizeof(rpcb_inaddr_loopback), @@ -228,6 +229,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, size_t salen, int proto, u32 version) { struct rpc_create_args args = { + .net = &init_net, .protocol = proto, .address = srvaddr, .addrsize = salen, From 9a23e332ec621d36e52cc7a978abc0917067b1aa Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 29 Sep 2010 16:05:12 +0400 Subject: [PATCH 36/99] sunrpc: Add net to xprt_create Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/clnt.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index af4b560f0794..c4f931597d0e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -249,6 +249,7 @@ static inline int bc_prealloc(struct rpc_rqst *req) struct xprt_create { int ident; /* XPRT_TRANSPORT identifier */ + struct net * net; struct sockaddr * srcaddr; /* optional local address */ struct sockaddr * dstaddr; /* remote peer address */ size_t addrlen; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index fa5549079d79..f4bbd830a4f3 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -284,6 +284,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) struct rpc_xprt *xprt; struct rpc_clnt *clnt; struct xprt_create xprtargs = { + .net = args->net, .ident = args->protocol, .srcaddr = args->saddress, .dstaddr = args->address, From 37aa2133731d9231eb834f700119f0d3f1ed2664 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 29 Sep 2010 16:05:43 +0400 Subject: [PATCH 37/99] sunrpc: Tag rpc_xprt with net The net is known from the xprt_create and this tagging will also give un the context in the conntection workers where real sockets are created. Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/xprt.h | 3 ++- net/sunrpc/xprt.c | 4 +++- net/sunrpc/xprtrdma/transport.c | 2 +- net/sunrpc/xprtsock.c | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index c4f931597d0e..89d10d279a20 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -224,6 +224,7 @@ struct rpc_xprt { bklog_u; /* backlog queue utilization */ } stat; + struct net *xprt_net; const char *address_strings[RPC_DISPLAY_MAX]; }; @@ -281,7 +282,7 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release(struct rpc_task *task); struct rpc_xprt * xprt_get(struct rpc_xprt *xprt); void xprt_put(struct rpc_xprt *xprt); -struct rpc_xprt * xprt_alloc(int size, int max_req); +struct rpc_xprt * xprt_alloc(struct net *net, int size, int max_req); void xprt_free(struct rpc_xprt *); static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 0637340e5342..953206d8c6c2 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -962,7 +962,7 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) spin_unlock(&xprt->reserve_lock); } -struct rpc_xprt *xprt_alloc(int size, int max_req) +struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req) { struct rpc_xprt *xprt; @@ -975,6 +975,7 @@ struct rpc_xprt *xprt_alloc(int size, int max_req) if (xprt->slot == NULL) goto out_free; + xprt->xprt_net = get_net(net); return xprt; out_free: @@ -986,6 +987,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc); void xprt_free(struct rpc_xprt *xprt) { + put_net(xprt->xprt_net); kfree(xprt->slot); kfree(xprt); } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 0f7a1b9d05ad..2da32b40bfcf 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -283,7 +283,7 @@ xprt_setup_rdma(struct xprt_create *args) return ERR_PTR(-EBADF); } - xprt = xprt_alloc(sizeof(struct rpcrdma_xprt), + xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), xprt_rdma_slot_table_entries); if (xprt == NULL) { dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b1e36ec6fd80..4ef3a6a9445c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2272,7 +2272,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, return ERR_PTR(-EBADF); } - xprt = xprt_alloc(sizeof(*new), slot_table_size); + xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size); if (xprt == NULL) { dprintk("RPC: xs_setup_xprt: couldn't allocate " "rpc_xprt\n"); From 721db93a55dad71bb89e7d11cc6be1f180ec3f2d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 29 Sep 2010 16:06:32 +0400 Subject: [PATCH 38/99] net: Export __sock_create Signed-off-by: Pavel Emelyanov Acked-by: David S. Miller Signed-off-by: J. Bruce Fields --- include/linux/net.h | 2 ++ net/socket.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/net.h b/include/linux/net.h index dee0b11a8759..16faa130088c 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -229,6 +229,8 @@ enum { extern int sock_wake_async(struct socket *sk, int how, int band); extern int sock_register(const struct net_proto_family *fam); extern void sock_unregister(int family); +extern int __sock_create(struct net *net, int family, int type, int proto, + struct socket **res, int kern); extern int sock_create(int family, int type, int proto, struct socket **res); extern int sock_create_kern(int family, int type, int proto, diff --git a/net/socket.c b/net/socket.c index 2270b941bcc7..0c37b0037b97 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1144,7 +1144,7 @@ int sock_wake_async(struct socket *sock, int how, int band) } EXPORT_SYMBOL(sock_wake_async); -static int __sock_create(struct net *net, int family, int type, int protocol, +int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) { int err; @@ -1256,6 +1256,7 @@ static int __sock_create(struct net *net, int family, int type, int protocol, rcu_read_unlock(); goto out_sock_release; } +EXPORT_SYMBOL(__sock_create); int sock_create(int family, int type, int protocol, struct socket **res) { From 14ec63c3336af7ea5445e0d8f4d26ba3041e40b3 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 29 Sep 2010 16:06:57 +0400 Subject: [PATCH 39/99] sunrpc: Create sockets in net namespaces The context is already known in all the sock_create callers. Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 2 +- net/sunrpc/xprtsock.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 559338527f47..88de3d093165 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1425,7 +1425,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, return ERR_PTR(-EINVAL); } - error = sock_create_kern(family, type, protocol, &sock); + error = __sock_create(net, family, type, protocol, &sock, 1); if (error < 0) return ERR_PTR(error); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 4ef3a6a9445c..f9964ef35e3e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1680,7 +1680,7 @@ static void xs_udp_connect_worker4(struct work_struct *work) /* Start by resetting any existing state */ xs_reset_transport(transport); - err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + err = __sock_create(xprt->xprt_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); if (err < 0) { dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; @@ -1725,7 +1725,7 @@ static void xs_udp_connect_worker6(struct work_struct *work) /* Start by resetting any existing state */ xs_reset_transport(transport); - err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock); + err = __sock_create(xprt->xprt_net, PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); if (err < 0) { dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; @@ -1931,7 +1931,7 @@ static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt, int err; /* start from scratch */ - err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); + err = __sock_create(xprt->xprt_net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock, 1); if (err < 0) { dprintk("RPC: can't create TCP transport socket (%d).\n", -err); @@ -1970,7 +1970,7 @@ static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt, int err; /* start from scratch */ - err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock); + err = __sock_create(xprt->xprt_net, PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock, 1); if (err < 0) { dprintk("RPC: can't create TCP transport socket (%d).\n", -err); From 277f68dbba397997c7f3dc843d14afa1654bb80e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 22 Sep 2010 12:55:06 +1000 Subject: [PATCH 40/99] sunrpc: fix race in new cache_wait code. If we set up to wait for a cache item to be filled in, and then find that it is no longer pending, it could be that some other thread is in 'cache_revisit_request' and has moved our request to its 'pending' list. So when our setup_deferral calls cache_revisit_request it will find nothing to put on the pending list, and do nothing. We then return from cache_wait_req, thus leaving the 'sleeper' on-stack structure open to being corrupted by subsequent stack usage. However that 'sleeper' could still be on the 'pending' list that the other thread is looking at and so any corruption could cause it to behave badly. To avoid this race we simply take the same path as if the 'wait_for_completion_interruptible_timeout' was interrupted and if the sleeper is no longer on the list (which it won't be) we wait on the completion - which will ensure that any other cache_revisit_request will have let go of the sleeper. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index e20968aac68a..1e72cc955931 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -578,10 +578,9 @@ static int cache_wait_req(struct cache_req *req, struct cache_head *item) dreq->revisit = cache_restart_thread; ret = setup_deferral(dreq, item); - if (ret) - return ret; - if (wait_for_completion_interruptible_timeout( + if (ret || + wait_for_completion_interruptible_timeout( &sleeper.completion, req->thread_wait) <= 0) { /* The completion wasn't completed, so we need * to clean up From 1e7af1b8062598a038c04dfaaabd038a0d6e8b6a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 1 Oct 2010 15:40:01 -0400 Subject: [PATCH 41/99] nfsd4: remove spkm3 Unfortunately, spkm3 never got very far; while interoperability with one other implementation was demonstrated at some point, problems were found with the spec that were deemed not worth fixing. The kernel code is useless on its own without nfs-utils patches which were never merged into nfs-utils, and were only ever available from citi.umich.edu. They appear not to have been updated since 2005. Therefore it seems safe to assume that this code has no users, and never will. Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/gss_spkm3.h | 55 ----- net/sunrpc/Kconfig | 19 -- net/sunrpc/auth_gss/Makefile | 5 - net/sunrpc/auth_gss/gss_spkm3_mech.c | 247 ----------------------- net/sunrpc/auth_gss/gss_spkm3_seal.c | 186 ----------------- net/sunrpc/auth_gss/gss_spkm3_token.c | 267 ------------------------- net/sunrpc/auth_gss/gss_spkm3_unseal.c | 127 ------------ 7 files changed, 906 deletions(-) delete mode 100644 include/linux/sunrpc/gss_spkm3.h delete mode 100644 net/sunrpc/auth_gss/gss_spkm3_mech.c delete mode 100644 net/sunrpc/auth_gss/gss_spkm3_seal.c delete mode 100644 net/sunrpc/auth_gss/gss_spkm3_token.c delete mode 100644 net/sunrpc/auth_gss/gss_spkm3_unseal.c diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h deleted file mode 100644 index e3e6a3437f8b..000000000000 --- a/include/linux/sunrpc/gss_spkm3.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * linux/include/linux/sunrpc/gss_spkm3.h - * - * Copyright (c) 2000 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson - */ - -#include -#include -#include - -struct spkm3_ctx { - struct xdr_netobj ctx_id; /* per message context id */ - int endtime; /* endtime of the context */ - struct xdr_netobj mech_used; - unsigned int ret_flags ; - struct xdr_netobj conf_alg; - struct xdr_netobj derived_conf_key; - struct xdr_netobj intg_alg; - struct xdr_netobj derived_integ_key; -}; - -/* OIDs declarations for K-ALG, I-ALG, C-ALG, and OWF-ALG */ -extern const struct xdr_netobj hmac_md5_oid; -extern const struct xdr_netobj cast5_cbc_oid; - -/* SPKM InnerContext Token types */ - -#define SPKM_ERROR_TOK 3 -#define SPKM_MIC_TOK 4 -#define SPKM_WRAP_TOK 5 -#define SPKM_DEL_TOK 6 - -u32 spkm3_make_token(struct spkm3_ctx *ctx, struct xdr_buf * text, struct xdr_netobj * token, int toktype); - -u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int toktype); - -#define CKSUMTYPE_RSA_MD5 0x0007 -#define CKSUMTYPE_HMAC_MD5 0x0008 - -s32 make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header, - unsigned int hdrlen, struct xdr_buf *body, - unsigned int body_offset, struct xdr_netobj *cksum); -void asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits); -int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, - int explen); -void spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, - unsigned char *ctxhdr, int elen, int zbit); -void spkm3_make_mic_token(unsigned char **tokp, int toklen, - struct xdr_netobj *mic_hdr, - struct xdr_netobj *md5cksum, int md5elen, int md5zbit); -u32 spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, - unsigned char **cksum); diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 3376d7657185..8873fd8ddacd 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -36,22 +36,3 @@ config RPCSEC_GSS_KRB5 Kerberos support should be installed. If unsure, say Y. - -config RPCSEC_GSS_SPKM3 - tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)" - depends on SUNRPC && EXPERIMENTAL - select SUNRPC_GSS - select CRYPTO - select CRYPTO_MD5 - select CRYPTO_DES - select CRYPTO_CAST5 - select CRYPTO_CBC - help - Choose Y here to enable Secure RPC using the SPKM3 public key - GSS-API mechanism (RFC 2025). - - Secure RPC calls with SPKM3 require an auxiliary userspace - daemon which may be found in the Linux nfs-utils package - available from http://linux-nfs.org/. - - If unsure, say N. diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile index 74a231735f67..7350d86a32ee 100644 --- a/net/sunrpc/auth_gss/Makefile +++ b/net/sunrpc/auth_gss/Makefile @@ -11,8 +11,3 @@ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o - -obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o - -rpcsec_gss_spkm3-objs := gss_spkm3_mech.o gss_spkm3_seal.o gss_spkm3_unseal.o \ - gss_spkm3_token.o diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c deleted file mode 100644 index adade3d313f2..000000000000 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ /dev/null @@ -1,247 +0,0 @@ -/* - * linux/net/sunrpc/gss_spkm3_mech.c - * - * Copyright (c) 2003 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson - * J. Bruce Fields - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef RPC_DEBUG -# define RPCDBG_FACILITY RPCDBG_AUTH -#endif - -static const void * -simple_get_bytes(const void *p, const void *end, void *res, int len) -{ - const void *q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - memcpy(res, p, len); - return q; -} - -static const void * -simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) -{ - const void *q; - unsigned int len; - p = simple_get_bytes(p, end, &len, sizeof(len)); - if (IS_ERR(p)) - return p; - res->len = len; - if (len == 0) { - res->data = NULL; - return p; - } - q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - res->data = kmemdup(p, len, GFP_NOFS); - if (unlikely(res->data == NULL)) - return ERR_PTR(-ENOMEM); - return q; -} - -static int -gss_import_sec_context_spkm3(const void *p, size_t len, - struct gss_ctx *ctx_id, - gfp_t gfp_mask) -{ - const void *end = (const void *)((const char *)p + len); - struct spkm3_ctx *ctx; - int version; - - if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask))) - goto out_err; - - p = simple_get_bytes(p, end, &version, sizeof(version)); - if (IS_ERR(p)) - goto out_err_free_ctx; - if (version != 1) { - dprintk("RPC: unknown spkm3 token format: " - "obsolete nfs-utils?\n"); - p = ERR_PTR(-EINVAL); - goto out_err_free_ctx; - } - - p = simple_get_netobj(p, end, &ctx->ctx_id); - if (IS_ERR(p)) - goto out_err_free_ctx; - - p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime)); - if (IS_ERR(p)) - goto out_err_free_ctx_id; - - p = simple_get_netobj(p, end, &ctx->mech_used); - if (IS_ERR(p)) - goto out_err_free_ctx_id; - - p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags)); - if (IS_ERR(p)) - goto out_err_free_mech; - - p = simple_get_netobj(p, end, &ctx->conf_alg); - if (IS_ERR(p)) - goto out_err_free_mech; - - p = simple_get_netobj(p, end, &ctx->derived_conf_key); - if (IS_ERR(p)) - goto out_err_free_conf_alg; - - p = simple_get_netobj(p, end, &ctx->intg_alg); - if (IS_ERR(p)) - goto out_err_free_conf_key; - - p = simple_get_netobj(p, end, &ctx->derived_integ_key); - if (IS_ERR(p)) - goto out_err_free_intg_alg; - - if (p != end) { - p = ERR_PTR(-EFAULT); - goto out_err_free_intg_key; - } - - ctx_id->internal_ctx_id = ctx; - - dprintk("RPC: Successfully imported new spkm context.\n"); - return 0; - -out_err_free_intg_key: - kfree(ctx->derived_integ_key.data); -out_err_free_intg_alg: - kfree(ctx->intg_alg.data); -out_err_free_conf_key: - kfree(ctx->derived_conf_key.data); -out_err_free_conf_alg: - kfree(ctx->conf_alg.data); -out_err_free_mech: - kfree(ctx->mech_used.data); -out_err_free_ctx_id: - kfree(ctx->ctx_id.data); -out_err_free_ctx: - kfree(ctx); -out_err: - return PTR_ERR(p); -} - -static void -gss_delete_sec_context_spkm3(void *internal_ctx) -{ - struct spkm3_ctx *sctx = internal_ctx; - - kfree(sctx->derived_integ_key.data); - kfree(sctx->intg_alg.data); - kfree(sctx->derived_conf_key.data); - kfree(sctx->conf_alg.data); - kfree(sctx->mech_used.data); - kfree(sctx->ctx_id.data); - kfree(sctx); -} - -static u32 -gss_verify_mic_spkm3(struct gss_ctx *ctx, - struct xdr_buf *signbuf, - struct xdr_netobj *checksum) -{ - u32 maj_stat = 0; - struct spkm3_ctx *sctx = ctx->internal_ctx_id; - - maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK); - - dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); - return maj_stat; -} - -static u32 -gss_get_mic_spkm3(struct gss_ctx *ctx, - struct xdr_buf *message_buffer, - struct xdr_netobj *message_token) -{ - u32 err = 0; - struct spkm3_ctx *sctx = ctx->internal_ctx_id; - - err = spkm3_make_token(sctx, message_buffer, - message_token, SPKM_MIC_TOK); - dprintk("RPC: gss_get_mic_spkm3 returning %d\n", err); - return err; -} - -static const struct gss_api_ops gss_spkm3_ops = { - .gss_import_sec_context = gss_import_sec_context_spkm3, - .gss_get_mic = gss_get_mic_spkm3, - .gss_verify_mic = gss_verify_mic_spkm3, - .gss_delete_sec_context = gss_delete_sec_context_spkm3, -}; - -static struct pf_desc gss_spkm3_pfs[] = { - {RPC_AUTH_GSS_SPKM, RPC_GSS_SVC_NONE, "spkm3"}, - {RPC_AUTH_GSS_SPKMI, RPC_GSS_SVC_INTEGRITY, "spkm3i"}, -}; - -static struct gss_api_mech gss_spkm3_mech = { - .gm_name = "spkm3", - .gm_owner = THIS_MODULE, - .gm_oid = {7, "\053\006\001\005\005\001\003"}, - .gm_ops = &gss_spkm3_ops, - .gm_pf_num = ARRAY_SIZE(gss_spkm3_pfs), - .gm_pfs = gss_spkm3_pfs, -}; - -static int __init init_spkm3_module(void) -{ - int status; - - status = gss_mech_register(&gss_spkm3_mech); - if (status) - printk("Failed to register spkm3 gss mechanism!\n"); - return status; -} - -static void __exit cleanup_spkm3_module(void) -{ - gss_mech_unregister(&gss_spkm3_mech); -} - -MODULE_LICENSE("GPL"); -module_init(init_spkm3_module); -module_exit(cleanup_spkm3_module); diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c deleted file mode 100644 index 5a3a65a0e2b4..000000000000 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ /dev/null @@ -1,186 +0,0 @@ -/* - * linux/net/sunrpc/gss_spkm3_seal.c - * - * Copyright (c) 2003 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef RPC_DEBUG -# define RPCDBG_FACILITY RPCDBG_AUTH -#endif - -const struct xdr_netobj hmac_md5_oid = { 8, "\x2B\x06\x01\x05\x05\x08\x01\x01"}; -const struct xdr_netobj cast5_cbc_oid = {9, "\x2A\x86\x48\x86\xF6\x7D\x07\x42\x0A"}; - -/* - * spkm3_make_token() - * - * Only SPKM_MIC_TOK with md5 intg-alg is supported - */ - -u32 -spkm3_make_token(struct spkm3_ctx *ctx, - struct xdr_buf * text, struct xdr_netobj * token, - int toktype) -{ - s32 checksum_type; - char tokhdrbuf[25]; - char cksumdata[16]; - struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; - struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf}; - int tokenlen = 0; - unsigned char *ptr; - s32 now; - int ctxelen = 0, ctxzbit = 0; - int md5elen = 0, md5zbit = 0; - - now = jiffies; - - if (ctx->ctx_id.len != 16) { - dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", - ctx->ctx_id.len); - goto out_err; - } - - if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) { - dprintk("RPC: gss_spkm3_seal: unsupported I-ALG " - "algorithm. only support hmac-md5 I-ALG.\n"); - goto out_err; - } else - checksum_type = CKSUMTYPE_HMAC_MD5; - - if (!g_OID_equal(&ctx->conf_alg, &cast5_cbc_oid)) { - dprintk("RPC: gss_spkm3_seal: unsupported C-ALG " - "algorithm\n"); - goto out_err; - } - - if (toktype == SPKM_MIC_TOK) { - /* Calculate checksum over the mic-header */ - asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit); - spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data, - ctxelen, ctxzbit); - if (make_spkm3_checksum(checksum_type, &ctx->derived_integ_key, - (char *)mic_hdr.data, mic_hdr.len, - text, 0, &md5cksum)) - goto out_err; - - asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit); - tokenlen = 10 + ctxelen + 1 + md5elen + 1; - - /* Create token header using generic routines */ - token->len = g_token_size(&ctx->mech_used, tokenlen + 2); - - ptr = token->data; - g_make_token_header(&ctx->mech_used, tokenlen + 2, &ptr); - - spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit); - } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */ - dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK " - "not supported\n"); - goto out_err; - } - - /* XXX need to implement sequence numbers, and ctx->expired */ - - return GSS_S_COMPLETE; -out_err: - token->data = NULL; - token->len = 0; - return GSS_S_FAILURE; -} - -static int -spkm3_checksummer(struct scatterlist *sg, void *data) -{ - struct hash_desc *desc = data; - - return crypto_hash_update(desc, sg, sg->length); -} - -/* checksum the plaintext data and hdrlen bytes of the token header */ -s32 -make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header, - unsigned int hdrlen, struct xdr_buf *body, - unsigned int body_offset, struct xdr_netobj *cksum) -{ - char *cksumname; - struct hash_desc desc; /* XXX add to ctx? */ - struct scatterlist sg[1]; - int err; - - switch (cksumtype) { - case CKSUMTYPE_HMAC_MD5: - cksumname = "hmac(md5)"; - break; - default: - dprintk("RPC: spkm3_make_checksum:" - " unsupported checksum %d", cksumtype); - return GSS_S_FAILURE; - } - - if (key->data == NULL || key->len <= 0) return GSS_S_FAILURE; - - desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(desc.tfm)) - return GSS_S_FAILURE; - cksum->len = crypto_hash_digestsize(desc.tfm); - desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; - - err = crypto_hash_setkey(desc.tfm, key->data, key->len); - if (err) - goto out; - - err = crypto_hash_init(&desc); - if (err) - goto out; - - sg_init_one(sg, header, hdrlen); - crypto_hash_update(&desc, sg, sg->length); - - xdr_process_buf(body, body_offset, body->len - body_offset, - spkm3_checksummer, &desc); - crypto_hash_final(&desc, cksum->data); - -out: - crypto_free_hash(desc.tfm); - - return err ? GSS_S_FAILURE : 0; -} diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c deleted file mode 100644 index a99825d7caa0..000000000000 --- a/net/sunrpc/auth_gss/gss_spkm3_token.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - * linux/net/sunrpc/gss_spkm3_token.c - * - * Copyright (c) 2003 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include -#include -#include -#include -#include -#include - -#ifdef RPC_DEBUG -# define RPCDBG_FACILITY RPCDBG_AUTH -#endif - -/* - * asn1_bitstring_len() - * - * calculate the asn1 bitstring length of the xdr_netobject - */ -void -asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits) -{ - int i, zbit = 0,elen = in->len; - char *ptr; - - ptr = &in->data[in->len -1]; - - /* count trailing 0's */ - for(i = in->len; i > 0; i--) { - if (*ptr == 0) { - ptr--; - elen--; - } else - break; - } - - /* count number of 0 bits in final octet */ - ptr = &in->data[elen - 1]; - for(i = 0; i < 8; i++) { - short mask = 0x01; - - if (!((mask << i) & *ptr)) - zbit++; - else - break; - } - *enclen = elen; - *zerobits = zbit; -} - -/* - * decode_asn1_bitstring() - * - * decode a bitstring into a buffer of the expected length. - * enclen = bit string length - * explen = expected length (define in rfc) - */ -int -decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen) -{ - if (!(out->data = kzalloc(explen,GFP_NOFS))) - return 0; - out->len = explen; - memcpy(out->data, in, enclen); - return 1; -} - -/* - * SPKMInnerContextToken choice SPKM_MIC asn1 token layout - * - * contextid is always 16 bytes plain data. max asn1 bitstring len = 17. - * - * tokenlen = pos[0] to end of token (max pos[45] with MD5 cksum) - * - * pos value - * ---------- - * [0] a4 SPKM-MIC tag - * [1] ?? innertoken length (max 44) - * - * - * tok_hdr piece of checksum data starts here - * - * the maximum mic-header len = 9 + 17 = 26 - * mic-header - * ---------- - * [2] 30 SEQUENCE tag - * [3] ?? mic-header length: (max 23) = TokenID + ContextID - * - * TokenID - all fields constant and can be hardcoded - * ------- - * [4] 02 Type 2 - * [5] 02 Length 2 - * [6][7] 01 01 TokenID (SPKM_MIC_TOK) - * - * ContextID - encoded length not constant, calculated - * --------- - * [8] 03 Type 3 - * [9] ?? encoded length - * [10] ?? ctxzbit - * [11] contextid - * - * mic_header piece of checksum data ends here. - * - * int-cksum - encoded length not constant, calculated - * --------- - * [??] 03 Type 3 - * [??] ?? encoded length - * [??] ?? md5zbit - * [??] int-cksum (NID_md5 = 16) - * - * maximum SPKM-MIC innercontext token length = - * 10 + encoded contextid_size(17 max) + 2 + encoded - * cksum_size (17 maxfor NID_md5) = 46 - */ - -/* - * spkm3_mic_header() - * - * Prepare the SPKM_MIC_TOK mic-header for check-sum calculation - * elen: 16 byte context id asn1 bitstring encoded length - */ -void -spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ctxdata, int elen, int zbit) -{ - char *hptr = *hdrbuf; - char *top = *hdrbuf; - - *(u8 *)hptr++ = 0x30; - *(u8 *)hptr++ = elen + 7; /* on the wire header length */ - - /* tokenid */ - *(u8 *)hptr++ = 0x02; - *(u8 *)hptr++ = 0x02; - *(u8 *)hptr++ = 0x01; - *(u8 *)hptr++ = 0x01; - - /* coniextid */ - *(u8 *)hptr++ = 0x03; - *(u8 *)hptr++ = elen + 1; /* add 1 to include zbit */ - *(u8 *)hptr++ = zbit; - memcpy(hptr, ctxdata, elen); - hptr += elen; - *hdrlen = hptr - top; -} - -/* - * spkm3_mic_innercontext_token() - * - * *tokp points to the beginning of the SPKM_MIC token described - * in rfc 2025, section 3.2.1: - * - * toklen is the inner token length - */ -void -spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit) -{ - unsigned char *ict = *tokp; - - *(u8 *)ict++ = 0xa4; - *(u8 *)ict++ = toklen; - memcpy(ict, mic_hdr->data, mic_hdr->len); - ict += mic_hdr->len; - - *(u8 *)ict++ = 0x03; - *(u8 *)ict++ = md5elen + 1; /* add 1 to include zbit */ - *(u8 *)ict++ = md5zbit; - memcpy(ict, md5cksum->data, md5elen); -} - -u32 -spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **cksum) -{ - struct xdr_netobj spkm3_ctx_id = {.len =0, .data = NULL}; - unsigned char *ptr = *tokp; - int ctxelen; - u32 ret = GSS_S_DEFECTIVE_TOKEN; - - /* spkm3 innercontext token preamble */ - if ((ptr[0] != 0xa4) || (ptr[2] != 0x30)) { - dprintk("RPC: BAD SPKM ictoken preamble\n"); - goto out; - } - - *mic_hdrlen = ptr[3]; - - /* token type */ - if ((ptr[4] != 0x02) || (ptr[5] != 0x02)) { - dprintk("RPC: BAD asn1 SPKM3 token type\n"); - goto out; - } - - /* only support SPKM_MIC_TOK */ - if((ptr[6] != 0x01) || (ptr[7] != 0x01)) { - dprintk("RPC: ERROR unsupported SPKM3 token\n"); - goto out; - } - - /* contextid */ - if (ptr[8] != 0x03) { - dprintk("RPC: BAD SPKM3 asn1 context-id type\n"); - goto out; - } - - ctxelen = ptr[9]; - if (ctxelen > 17) { /* length includes asn1 zbit octet */ - dprintk("RPC: BAD SPKM3 contextid len %d\n", ctxelen); - goto out; - } - - /* ignore ptr[10] */ - - if(!decode_asn1_bitstring(&spkm3_ctx_id, &ptr[11], ctxelen - 1, 16)) - goto out; - - /* - * in the current implementation: the optional int-alg is not present - * so the default int-alg (md5) is used the optional snd-seq field is - * also not present - */ - - if (*mic_hdrlen != 6 + ctxelen) { - dprintk("RPC: BAD SPKM_ MIC_TOK header len %d: we only " - "support default int-alg (should be absent) " - "and do not support snd-seq\n", *mic_hdrlen); - goto out; - } - /* checksum */ - *cksum = (&ptr[10] + ctxelen); /* ctxelen includes ptr[10] */ - - ret = GSS_S_COMPLETE; -out: - kfree(spkm3_ctx_id.data); - return ret; -} - diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c deleted file mode 100644 index cc21ee860bb6..000000000000 --- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * linux/net/sunrpc/gss_spkm3_unseal.c - * - * Copyright (c) 2003 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include -#include -#include -#include -#include - -#ifdef RPC_DEBUG -# define RPCDBG_FACILITY RPCDBG_AUTH -#endif - -/* - * spkm3_read_token() - * - * only SPKM_MIC_TOK with md5 intg-alg is supported - */ -u32 -spkm3_read_token(struct spkm3_ctx *ctx, - struct xdr_netobj *read_token, /* checksum */ - struct xdr_buf *message_buffer, /* signbuf */ - int toktype) -{ - s32 checksum_type; - s32 code; - struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; - char cksumdata[16]; - struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; - unsigned char *ptr = (unsigned char *)read_token->data; - unsigned char *cksum; - int bodysize, md5elen; - int mic_hdrlen; - u32 ret = GSS_S_DEFECTIVE_TOKEN; - - if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used, - &bodysize, &ptr, read_token->len)) - goto out; - - /* decode the token */ - - if (toktype != SPKM_MIC_TOK) { - dprintk("RPC: BAD SPKM3 token type: %d\n", toktype); - goto out; - } - - if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum))) - goto out; - - if (*cksum++ != 0x03) { - dprintk("RPC: spkm3_read_token BAD checksum type\n"); - goto out; - } - md5elen = *cksum++; - cksum++; /* move past the zbit */ - - if (!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16)) - goto out; - - /* HARD CODED FOR MD5 */ - - /* compute the checksum of the message. - * ptr + 2 = start of header piece of checksum - * mic_hdrlen + 2 = length of header piece of checksum - */ - ret = GSS_S_DEFECTIVE_TOKEN; - if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) { - dprintk("RPC: gss_spkm3_seal: unsupported I-ALG " - "algorithm\n"); - goto out; - } - - checksum_type = CKSUMTYPE_HMAC_MD5; - - code = make_spkm3_checksum(checksum_type, - &ctx->derived_integ_key, ptr + 2, mic_hdrlen + 2, - message_buffer, 0, &md5cksum); - - if (code) - goto out; - - ret = GSS_S_BAD_SIG; - code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len); - if (code) { - dprintk("RPC: bad MIC checksum\n"); - goto out; - } - - - /* XXX: need to add expiration and sequencing */ - ret = GSS_S_COMPLETE; -out: - kfree(wire_cksum.data); - return ret; -} From 07263f1efe7d5b96e6713471abfa087f41bb2b7c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 31 May 2010 19:09:40 -0400 Subject: [PATCH 42/99] nfsd4: minor variable renaming (cb -> conn) Now that we have both nfsd4_callback and nfsd4_cb_conn structures, I get confused if variables of both types are always named cb.... Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 16 ++++++++-------- fs/nfsd/nfs4state.c | 28 ++++++++++++++-------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 1112f451295a..4566b69128a3 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -472,7 +472,7 @@ static int max_cb_time(void) /* Reference counting, callback cleanup, etc., all look racy as heck. * And why is cl_cb_set an atomic? */ -int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb) +int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn) { struct rpc_timeout timeparms = { .to_initval = max_cb_time(), @@ -481,11 +481,11 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb) struct rpc_create_args args = { .net = &init_net, .protocol = XPRT_TRANSPORT_TCP, - .address = (struct sockaddr *) &cb->cb_addr, - .addrsize = cb->cb_addrlen, + .address = (struct sockaddr *) &conn->cb_addr, + .addrsize = conn->cb_addrlen, .timeout = &timeparms, .program = &cb_program, - .prognumber = cb->cb_prog, + .prognumber = conn->cb_prog, .version = 0, .authflavor = clp->cl_flavor, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), @@ -495,8 +495,8 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb) if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; - if (cb->cb_minorversion) { - args.bc_xprt = cb->cb_xprt; + if (conn->cb_minorversion) { + args.bc_xprt = conn->cb_xprt; args.protocol = XPRT_TRANSPORT_BC_TCP; } /* Create RPC client */ @@ -563,13 +563,13 @@ void do_probe_callback(struct nfs4_client *clp) /* * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... */ -void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb) +void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) { int status; BUG_ON(atomic_read(&clp->cl_cb_set)); - status = setup_callback_client(clp, cb); + status = setup_callback_client(clp, conn); if (status) { warn_no_callback_path(clp, status); return; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cf0d2ffb3c84..d347180ce55a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -207,7 +207,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f { struct nfs4_delegation *dp; struct nfs4_file *fp = stp->st_file; - struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn; + struct nfs4_cb_conn *conn = &stp->st_stateowner->so_client->cl_cb_conn; dprintk("NFSD alloc_init_deleg\n"); /* @@ -234,7 +234,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f nfs4_file_get_access(fp, O_RDONLY); dp->dl_flock = NULL; dp->dl_type = type; - dp->dl_ident = cb->cb_ident; + dp->dl_ident = conn->cb_ident; dp->dl_stateid.si_boot = boot_time; dp->dl_stateid.si_stateownerid = current_delegid++; dp->dl_stateid.si_fileid = 0; @@ -1098,7 +1098,7 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, static void gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) { - struct nfs4_cb_conn *cb = &clp->cl_cb_conn; + struct nfs4_cb_conn *conn = &clp->cl_cb_conn; unsigned short expected_family; /* Currently, we only support tcp and tcp6 for the callback channel */ @@ -1111,24 +1111,24 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) else goto out_err; - cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, + conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, se->se_callback_addr_len, - (struct sockaddr *) &cb->cb_addr, - sizeof(cb->cb_addr)); + (struct sockaddr *)&conn->cb_addr, + sizeof(conn->cb_addr)); - if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family) + if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family) goto out_err; - if (cb->cb_addr.ss_family == AF_INET6) - ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid; + if (conn->cb_addr.ss_family == AF_INET6) + ((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid; - cb->cb_minorversion = 0; - cb->cb_prog = se->se_callback_prog; - cb->cb_ident = se->se_callback_ident; + conn->cb_minorversion = 0; + conn->cb_prog = se->se_callback_prog; + conn->cb_ident = se->se_callback_ident; return; out_err: - cb->cb_addr.ss_family = AF_UNSPEC; - cb->cb_addrlen = 0; + conn->cb_addr.ss_family = AF_UNSPEC; + conn->cb_addrlen = 0; dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " "will not receive delegations\n", clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); From 586f36735e1d38c32bbfbb2716461e7178724b15 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 May 2010 17:40:53 -0400 Subject: [PATCH 43/99] nfsd4: combine nfs4_rpc_args and nfsd4_cb_sequence These two structs don't really need to be distinct as far as I can tell. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 30 +++++++++++++++--------------- fs/nfsd/state.h | 11 +++-------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 4566b69128a3..5687fce85641 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -247,7 +247,7 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp, } static void -encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args, +encode_cb_sequence(struct xdr_stream *xdr, struct nfs4_rpc_args *args, struct nfs4_cb_compound_hdr *hdr) { __be32 *p; @@ -258,8 +258,8 @@ encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args, RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20); WRITE32(OP_CB_SEQUENCE); - WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(args->cbs_clp->cl_cb_seq_nr); + WRITEMEM(args->args_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); + WRITE32(args->args_clp->cl_cb_seq_nr); WRITE32(0); /* slotid, always 0 */ WRITE32(0); /* highest slotid always 0 */ WRITE32(0); /* cachethis always 0 */ @@ -285,12 +285,12 @@ nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args = rpc_args->args_op; struct nfs4_cb_compound_hdr hdr = { .ident = args->dl_ident, - .minorversion = rpc_args->args_seq.cbs_minorversion, + .minorversion = rpc_args->args_minorversion, }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_cb_compound_hdr(&xdr, &hdr); - encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr); + encode_cb_sequence(&xdr, rpc_args, &hdr); encode_cb_recall(&xdr, args, &hdr); encode_cb_nops(&hdr); return 0; @@ -338,7 +338,7 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) * with a single slot. */ static int -decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res, +decode_cb_sequence(struct xdr_stream *xdr, struct nfs4_rpc_args *res, struct rpc_rqst *rqstp) { struct nfs4_sessionid id; @@ -346,7 +346,7 @@ decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res, u32 dummy; __be32 *p; - if (res->cbs_minorversion == 0) + if (res->args_minorversion == 0) return 0; status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE); @@ -362,13 +362,13 @@ decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res, READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); - if (memcmp(id.data, res->cbs_clp->cl_sessionid.data, + if (memcmp(id.data, res->args_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN)) { dprintk("%s Invalid session id\n", __func__); goto out; } READ32(dummy); - if (dummy != res->cbs_clp->cl_cb_seq_nr) { + if (dummy != res->args_clp->cl_cb_seq_nr) { dprintk("%s Invalid sequence number\n", __func__); goto out; } @@ -392,7 +392,7 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, - struct nfsd4_cb_sequence *seq) + struct nfs4_rpc_args *args) { struct xdr_stream xdr; struct nfs4_cb_compound_hdr hdr; @@ -402,8 +402,8 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, status = decode_cb_compound_hdr(&xdr, &hdr); if (status) goto out; - if (seq) { - status = decode_cb_sequence(&xdr, seq, rqstp); + if (args) { + status = decode_cb_sequence(&xdr, args, rqstp); if (status) goto out; } @@ -603,8 +603,8 @@ static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, * We'll need the clp during XDR encoding and decoding, * and the sequence during decoding to verify the reply */ - args->args_seq.cbs_clp = clp; - task->tk_msg.rpc_resp = &args->args_seq; + args->args_clp = clp; + task->tk_msg.rpc_resp = args; out: dprintk("%s status=%d\n", __func__, status); @@ -623,7 +623,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) u32 minorversion = clp->cl_cb_conn.cb_minorversion; int status = 0; - args->args_seq.cbs_minorversion = minorversion; + args->args_minorversion = minorversion; if (minorversion) { status = nfsd41_cb_setup_sequence(clp, task); if (status) { diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 322518c88e4b..59313f1d8e67 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -64,15 +64,10 @@ typedef struct { (s)->si_fileid, \ (s)->si_generation -struct nfsd4_cb_sequence { - /* args/res */ - u32 cbs_minorversion; - struct nfs4_client *cbs_clp; -}; - struct nfs4_rpc_args { - void *args_op; - struct nfsd4_cb_sequence args_seq; + void *args_op; + struct nfs4_client *args_clp; + u32 args_minorversion; }; struct nfsd4_callback { From 1c8556026edac60368ceef446f0febc08014ba78 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 May 2010 17:46:00 -0400 Subject: [PATCH 44/99] nfsd4: rename nfs4_rpc_args->nfsd4_cb_args With apologies for the gratuitous rename, the new name seems more helpful to me. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 14 +++++++------- fs/nfsd/state.h | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 5687fce85641..5508e928fd9f 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -247,7 +247,7 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp, } static void -encode_cb_sequence(struct xdr_stream *xdr, struct nfs4_rpc_args *args, +encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_args *args, struct nfs4_cb_compound_hdr *hdr) { __be32 *p; @@ -279,7 +279,7 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) static int nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, - struct nfs4_rpc_args *rpc_args) + struct nfsd4_cb_args *rpc_args) { struct xdr_stream xdr; struct nfs4_delegation *args = rpc_args->args_op; @@ -338,7 +338,7 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) * with a single slot. */ static int -decode_cb_sequence(struct xdr_stream *xdr, struct nfs4_rpc_args *res, +decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_args *res, struct rpc_rqst *rqstp) { struct nfs4_sessionid id; @@ -392,7 +392,7 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, - struct nfs4_rpc_args *args) + struct nfsd4_cb_args *args) { struct xdr_stream xdr; struct nfs4_cb_compound_hdr hdr; @@ -585,7 +585,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, struct rpc_task *task) { - struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; + struct nfsd4_cb_args *args = task->tk_msg.rpc_argp; u32 *ptr = (u32 *)clp->cl_sessionid.data; int status = 0; @@ -619,7 +619,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) { struct nfs4_delegation *dp = calldata; struct nfs4_client *clp = dp->dl_client; - struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; + struct nfsd4_cb_args *args = task->tk_msg.rpc_argp; u32 minorversion = clp->cl_cb_conn.cb_minorversion; int status = 0; @@ -756,7 +756,7 @@ static void _nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_client; struct rpc_clnt *clnt = clp->cl_cb_client; - struct nfs4_rpc_args *args = &dp->dl_recall.cb_args; + struct nfsd4_cb_args *args = &dp->dl_recall.cb_args; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], .rpc_cred = callback_cred diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 59313f1d8e67..f988b90ec213 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -64,14 +64,14 @@ typedef struct { (s)->si_fileid, \ (s)->si_generation -struct nfs4_rpc_args { +struct nfsd4_cb_args { void *args_op; struct nfs4_client *args_clp; u32 args_minorversion; }; struct nfsd4_callback { - struct nfs4_rpc_args cb_args; + struct nfsd4_cb_args cb_args; struct work_struct cb_work; }; From 5878453dbde627a8e1b5a4693087e36cb88d45b1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 16 May 2010 16:47:08 -0400 Subject: [PATCH 45/99] nfsd4: generic callback code Make the recall callback code more generic, so that other callbacks will be able to use it too. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 72 +++++++++++++++++++----------------------- fs/nfsd/state.h | 2 ++ 2 files changed, 34 insertions(+), 40 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 5508e928fd9f..a037f26252ee 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -585,7 +585,6 @@ void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, struct rpc_task *task) { - struct nfsd4_cb_args *args = task->tk_msg.rpc_argp; u32 *ptr = (u32 *)clp->cl_sessionid.data; int status = 0; @@ -598,14 +597,6 @@ static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, status = -EAGAIN; goto out; } - - /* - * We'll need the clp during XDR encoding and decoding, - * and the sequence during decoding to verify the reply - */ - args->args_clp = clp; - task->tk_msg.rpc_resp = args; - out: dprintk("%s status=%d\n", __func__, status); return status; @@ -617,7 +608,8 @@ static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, */ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) { - struct nfs4_delegation *dp = calldata; + struct nfsd4_callback *cb = calldata; + struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); struct nfs4_client *clp = dp->dl_client; struct nfsd4_cb_args *args = task->tk_msg.rpc_argp; u32 minorversion = clp->cl_cb_conn.cb_minorversion; @@ -640,7 +632,8 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) static void nfsd4_cb_done(struct rpc_task *task, void *calldata) { - struct nfs4_delegation *dp = calldata; + struct nfsd4_callback *cb = calldata; + struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); struct nfs4_client *clp = dp->dl_client; dprintk("%s: minorversion=%d\n", __func__, @@ -662,7 +655,8 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) { - struct nfs4_delegation *dp = calldata; + struct nfsd4_callback *cb = calldata; + struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); struct nfs4_client *clp = dp->dl_client; struct rpc_clnt *current_rpc_client = clp->cl_cb_client; @@ -707,7 +701,8 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) static void nfsd4_cb_recall_release(void *calldata) { - struct nfs4_delegation *dp = calldata; + struct nfsd4_callback *cb = calldata; + struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); nfs4_put_delegation(dp); } @@ -749,42 +744,39 @@ void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new) rpc_shutdown_client(old); } -/* - * called with dp->dl_count inc'ed. - */ -static void _nfsd4_cb_recall(struct nfs4_delegation *dp) +void nfsd4_release_cb(struct nfsd4_callback *cb) { - struct nfs4_client *clp = dp->dl_client; - struct rpc_clnt *clnt = clp->cl_cb_client; - struct nfsd4_cb_args *args = &dp->dl_recall.cb_args; - struct rpc_message msg = { - .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], - .rpc_cred = callback_cred - }; - - if (clnt == NULL) { - nfs4_put_delegation(dp); - return; /* Client is shutting down; give up. */ - } - - args->args_op = dp; - msg.rpc_argp = args; - dp->dl_retries = 1; - rpc_call_async(clnt, &msg, RPC_TASK_SOFT, &nfsd4_cb_recall_ops, dp); + if (cb->cb_ops->rpc_release) + cb->cb_ops->rpc_release(cb); } void nfsd4_do_callback_rpc(struct work_struct *w) { - /* XXX: for now, just send off delegation recall. */ - /* In future, generalize to handle any sort of callback. */ - struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work); - struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall); + struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); + struct nfs4_client *clp = cb->cb_args.args_clp; + struct rpc_clnt *clnt = clp->cl_cb_client; - _nfsd4_cb_recall(dp); + if (clnt == NULL) { + nfsd4_release_cb(cb); + return; /* Client is shutting down; give up. */ + } + rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT, cb->cb_ops, cb); } - void nfsd4_cb_recall(struct nfs4_delegation *dp) { + struct nfsd4_callback *cb = &dp->dl_recall; + + dp->dl_retries = 1; + cb->cb_args.args_op = dp; + cb->cb_args.args_clp = dp->dl_client; + cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; + cb->cb_msg.rpc_argp = &cb->cb_args; + cb->cb_msg.rpc_resp = &cb->cb_args; + cb->cb_msg.rpc_cred = callback_cred; + + cb->cb_ops = &nfsd4_cb_recall_ops; + dp->dl_retries = 1; + queue_work(callback_wq, &dp->dl_recall.cb_work); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index f988b90ec213..6e592148ad80 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -72,6 +72,8 @@ struct nfsd4_cb_args { struct nfsd4_callback { struct nfsd4_cb_args cb_args; + struct rpc_message cb_msg; + const struct rpc_call_ops *cb_ops; struct work_struct cb_work; }; From cee277d92495a9ea49a6137fe7005d7c76b31b5b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 May 2010 17:52:14 -0400 Subject: [PATCH 46/99] nfsd4: use generic callback code in null case This will eventually allow us, for example, to kick off null callback from contexts where we can't sleep. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 33 ++++++++++++++++++--------------- fs/nfsd/nfs4state.c | 1 + fs/nfsd/state.h | 1 + 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index a037f26252ee..26fa878005cc 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -519,7 +519,7 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason) static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) { - struct nfs4_client *clp = calldata; + struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); if (task->tk_status) warn_no_callback_path(clp, task->tk_status); @@ -528,6 +528,8 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) } static const struct rpc_call_ops nfsd4_cb_probe_ops = { + /* XXX: release method to ensure we set the cb channel down if + * necessary on early failure? */ .rpc_call_done = nfsd4_cb_probe_done, }; @@ -543,21 +545,23 @@ int set_callback_cred(void) return 0; } +static struct workqueue_struct *callback_wq; void do_probe_callback(struct nfs4_client *clp) { - struct rpc_message msg = { - .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], - .rpc_argp = clp, - .rpc_cred = callback_cred - }; - int status; + struct nfsd4_callback *cb = &clp->cl_cb_null; - status = rpc_call_async(clp->cl_cb_client, &msg, - RPC_TASK_SOFT | RPC_TASK_SOFTCONN, - &nfsd4_cb_probe_ops, (void *)clp); - if (status) - warn_no_callback_path(clp, status); + cb->cb_args.args_op = NULL; + cb->cb_args.args_clp = clp; + + cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL]; + cb->cb_msg.rpc_argp = NULL; + cb->cb_msg.rpc_resp = NULL; + cb->cb_msg.rpc_cred = callback_cred; + + cb->cb_ops = &nfsd4_cb_probe_ops; + + queue_work(callback_wq, &cb->cb_work); } /* @@ -713,8 +717,6 @@ static const struct rpc_call_ops nfsd4_cb_recall_ops = { .rpc_release = nfsd4_cb_recall_release, }; -static struct workqueue_struct *callback_wq; - int nfsd4_create_callback_queue(void) { callback_wq = create_singlethread_workqueue("nfsd4_callbacks"); @@ -760,7 +762,8 @@ void nfsd4_do_callback_rpc(struct work_struct *w) nfsd4_release_cb(cb); return; /* Client is shutting down; give up. */ } - rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT, cb->cb_ops, cb); + rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, + cb->cb_ops, cb); } void nfsd4_cb_recall(struct nfs4_delegation *dp) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d347180ce55a..2f464fb26afc 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -978,6 +978,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, INIT_LIST_HEAD(&clp->cl_delegations); INIT_LIST_HEAD(&clp->cl_sessions); INIT_LIST_HEAD(&clp->cl_lru); + INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 6e592148ad80..19732d531cda 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -223,6 +223,7 @@ struct nfs4_client { struct nfs4_cb_conn cl_cb_conn; struct rpc_clnt *cl_cb_client; atomic_t cl_cb_set; + struct nfsd4_callback cl_cb_null; /* for nfs41 */ struct list_head cl_sessions; From fb003923263c3f0cb02adbd56a22fe16ef5c0e77 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 31 May 2010 18:21:37 -0400 Subject: [PATCH 47/99] nfsd4: remove separate cb_args struct I don't see the point of the separate struct. It seems to just be getting in the way. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 45 +++++++++++++++++++++--------------------- fs/nfsd/state.h | 10 +++------- 2 files changed, 25 insertions(+), 30 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 26fa878005cc..07c3be6eea64 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -247,7 +247,7 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp, } static void -encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_args *args, +encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, struct nfs4_cb_compound_hdr *hdr) { __be32 *p; @@ -258,8 +258,8 @@ encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_args *args, RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20); WRITE32(OP_CB_SEQUENCE); - WRITEMEM(args->args_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(args->args_clp->cl_cb_seq_nr); + WRITEMEM(cb->cb_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); + WRITE32(cb->cb_clp->cl_cb_seq_nr); WRITE32(0); /* slotid, always 0 */ WRITE32(0); /* highest slotid always 0 */ WRITE32(0); /* cachethis always 0 */ @@ -279,18 +279,18 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) static int nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, - struct nfsd4_cb_args *rpc_args) + struct nfsd4_callback *cb) { struct xdr_stream xdr; - struct nfs4_delegation *args = rpc_args->args_op; + struct nfs4_delegation *args = cb->cb_op; struct nfs4_cb_compound_hdr hdr = { .ident = args->dl_ident, - .minorversion = rpc_args->args_minorversion, + .minorversion = cb->cb_minorversion, }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_cb_compound_hdr(&xdr, &hdr); - encode_cb_sequence(&xdr, rpc_args, &hdr); + encode_cb_sequence(&xdr, cb, &hdr); encode_cb_recall(&xdr, args, &hdr); encode_cb_nops(&hdr); return 0; @@ -338,7 +338,7 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) * with a single slot. */ static int -decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_args *res, +decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, struct rpc_rqst *rqstp) { struct nfs4_sessionid id; @@ -346,7 +346,7 @@ decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_args *res, u32 dummy; __be32 *p; - if (res->args_minorversion == 0) + if (cb->cb_minorversion == 0) return 0; status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE); @@ -362,13 +362,13 @@ decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_args *res, READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); - if (memcmp(id.data, res->args_clp->cl_sessionid.data, + if (memcmp(id.data, cb->cb_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN)) { dprintk("%s Invalid session id\n", __func__); goto out; } READ32(dummy); - if (dummy != res->args_clp->cl_cb_seq_nr) { + if (dummy != cb->cb_clp->cl_cb_seq_nr) { dprintk("%s Invalid sequence number\n", __func__); goto out; } @@ -392,7 +392,7 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, - struct nfsd4_cb_args *args) + struct nfsd4_callback *cb) { struct xdr_stream xdr; struct nfs4_cb_compound_hdr hdr; @@ -402,8 +402,8 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, status = decode_cb_compound_hdr(&xdr, &hdr); if (status) goto out; - if (args) { - status = decode_cb_sequence(&xdr, args, rqstp); + if (cb) { + status = decode_cb_sequence(&xdr, cb, rqstp); if (status) goto out; } @@ -551,8 +551,8 @@ void do_probe_callback(struct nfs4_client *clp) { struct nfsd4_callback *cb = &clp->cl_cb_null; - cb->cb_args.args_op = NULL; - cb->cb_args.args_clp = clp; + cb->cb_op = NULL; + cb->cb_clp = clp; cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL]; cb->cb_msg.rpc_argp = NULL; @@ -615,11 +615,10 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) struct nfsd4_callback *cb = calldata; struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); struct nfs4_client *clp = dp->dl_client; - struct nfsd4_cb_args *args = task->tk_msg.rpc_argp; u32 minorversion = clp->cl_cb_conn.cb_minorversion; int status = 0; - args->args_minorversion = minorversion; + cb->cb_minorversion = minorversion; if (minorversion) { status = nfsd41_cb_setup_sequence(clp, task); if (status) { @@ -755,7 +754,7 @@ void nfsd4_release_cb(struct nfsd4_callback *cb) void nfsd4_do_callback_rpc(struct work_struct *w) { struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); - struct nfs4_client *clp = cb->cb_args.args_clp; + struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt = clp->cl_cb_client; if (clnt == NULL) { @@ -771,11 +770,11 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) struct nfsd4_callback *cb = &dp->dl_recall; dp->dl_retries = 1; - cb->cb_args.args_op = dp; - cb->cb_args.args_clp = dp->dl_client; + cb->cb_op = dp; + cb->cb_clp = dp->dl_client; cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; - cb->cb_msg.rpc_argp = &cb->cb_args; - cb->cb_msg.rpc_resp = &cb->cb_args; + cb->cb_msg.rpc_argp = cb; + cb->cb_msg.rpc_resp = cb; cb->cb_msg.rpc_cred = callback_cred; cb->cb_ops = &nfsd4_cb_recall_ops; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 19732d531cda..2ece6bee65f7 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -64,14 +64,10 @@ typedef struct { (s)->si_fileid, \ (s)->si_generation -struct nfsd4_cb_args { - void *args_op; - struct nfs4_client *args_clp; - u32 args_minorversion; -}; - struct nfsd4_callback { - struct nfsd4_cb_args cb_args; + void *cb_op; + struct nfs4_client *cb_clp; + u32 cb_minorversion; struct rpc_message cb_msg; const struct rpc_call_ops *cb_ops; struct work_struct cb_work; From 6ff8da088766d70f0441feb982b82978a6cbf7ef Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 4 Jun 2010 20:04:45 -0400 Subject: [PATCH 48/99] nfsd4: Move callback setup to callback queue Instead of creating the new rpc client from a regular server thread, set a flag, kick off a null call, and allow the null call to do the work of setting up the client on the callback workqueue. Use a spinlock to ensure the callback work gets a consistent view of the callback parameters. This allows, for example, changing the callback from contexts where sleeping is not allowed. I hope it will also keep the locking simple as we add more session and trunking features, by serializing most of the callback-specific work. This also closes a small race where the the new cb_ident could be used with an old connection (or vice-versa). Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 73 ++++++++++++++++++++++++++++++------------ fs/nfsd/nfs4state.c | 7 ++-- fs/nfsd/state.h | 10 ++++-- 3 files changed, 63 insertions(+), 27 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 07c3be6eea64..a269dbeff150 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -284,7 +284,7 @@ nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct xdr_stream xdr; struct nfs4_delegation *args = cb->cb_op; struct nfs4_cb_compound_hdr hdr = { - .ident = args->dl_ident, + .ident = cb->cb_clp->cl_cb_ident, .minorversion = cb->cb_minorversion, }; @@ -506,7 +506,8 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn) PTR_ERR(client)); return PTR_ERR(client); } - nfsd4_set_callback_client(clp, client); + clp->cl_cb_ident = conn->cb_ident; + clp->cl_cb_client = client; return 0; } @@ -569,15 +570,12 @@ void do_probe_callback(struct nfs4_client *clp) */ void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) { - int status; - BUG_ON(atomic_read(&clp->cl_cb_set)); - status = setup_callback_client(clp, conn); - if (status) { - warn_no_callback_path(clp, status); - return; - } + spin_lock(&clp->cl_lock); + memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn)); + set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); + spin_unlock(&clp->cl_lock); do_probe_callback(clp); } @@ -730,19 +728,16 @@ void nfsd4_destroy_callback_queue(void) } /* must be called under the state lock */ -void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new) +void nfsd4_shutdown_callback(struct nfs4_client *clp) { - struct rpc_clnt *old = clp->cl_cb_client; - - clp->cl_cb_client = new; + set_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags); /* - * After this, any work that saw the old value of cl_cb_client will - * be gone: + * Note this won't actually result in a null callback; + * instead, nfsd4_do_callback_rpc() will detect the killed + * client, destroy the rpc client, and stop: */ + do_probe_callback(clp); flush_workqueue(callback_wq); - /* So we can safely shut it down: */ - if (old) - rpc_shutdown_client(old); } void nfsd4_release_cb(struct nfsd4_callback *cb) @@ -751,15 +746,51 @@ void nfsd4_release_cb(struct nfsd4_callback *cb) cb->cb_ops->rpc_release(cb); } +void nfsd4_process_cb_update(struct nfsd4_callback *cb) +{ + struct nfs4_cb_conn conn; + struct nfs4_client *clp = cb->cb_clp; + int err; + + /* + * This is either an update, or the client dying; in either case, + * kill the old client: + */ + if (clp->cl_cb_client) { + rpc_shutdown_client(clp->cl_cb_client); + clp->cl_cb_client = NULL; + } + if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags)) + return; + spin_lock(&clp->cl_lock); + /* + * Only serialized callback code is allowed to clear these + * flags; main nfsd code can only set them: + */ + BUG_ON(!clp->cl_cb_flags); + clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); + memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn)); + spin_unlock(&clp->cl_lock); + + err = setup_callback_client(clp, &conn); + if (err) + warn_no_callback_path(clp, err); +} + void nfsd4_do_callback_rpc(struct work_struct *w) { struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; - struct rpc_clnt *clnt = clp->cl_cb_client; + struct rpc_clnt *clnt; - if (clnt == NULL) { + if (clp->cl_cb_flags) + nfsd4_process_cb_update(cb); + + clnt = clp->cl_cb_client; + if (!clnt) { + /* Callback channel broken, or client killed; give up: */ nfsd4_release_cb(cb); - return; /* Client is shutting down; give up. */ + return; } rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, cb->cb_ops, cb); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2f464fb26afc..d3f12dcc1696 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -207,7 +207,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f { struct nfs4_delegation *dp; struct nfs4_file *fp = stp->st_file; - struct nfs4_cb_conn *conn = &stp->st_stateowner->so_client->cl_cb_conn; dprintk("NFSD alloc_init_deleg\n"); /* @@ -234,7 +233,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f nfs4_file_get_access(fp, O_RDONLY); dp->dl_flock = NULL; dp->dl_type = type; - dp->dl_ident = conn->cb_ident; dp->dl_stateid.si_boot = boot_time; dp->dl_stateid.si_stateownerid = current_delegid++; dp->dl_stateid.si_fileid = 0; @@ -875,7 +873,7 @@ expire_client(struct nfs4_client *clp) sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); release_openowner(sop); } - nfsd4_set_callback_client(clp, NULL); + nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); list_del(&clp->cl_idhash); @@ -978,6 +976,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, INIT_LIST_HEAD(&clp->cl_delegations); INIT_LIST_HEAD(&clp->cl_sessions); INIT_LIST_HEAD(&clp->cl_lru); + spin_lock_init(&clp->cl_lock); INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); @@ -1547,7 +1546,7 @@ nfsd4_destroy_session(struct svc_rqst *r, nfs4_lock_state(); /* wait for callbacks */ - nfsd4_set_callback_client(ses->se_client, NULL); + nfsd4_shutdown_callback(ses->se_client); nfs4_unlock_state(); nfsd4_put_session(ses); status = nfs_ok; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 2ece6bee65f7..58bc2a63ca14 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -84,7 +84,6 @@ struct nfs4_delegation { u32 dl_type; time_t dl_time; /* For recall: */ - u32 dl_ident; stateid_t dl_stateid; struct knfsd_fh dl_fh; int dl_retries; @@ -217,10 +216,17 @@ struct nfs4_client { /* for v4.0 and v4.1 callbacks: */ struct nfs4_cb_conn cl_cb_conn; +#define NFSD4_CLIENT_CB_UPDATE 1 +#define NFSD4_CLIENT_KILL 2 + unsigned long cl_cb_flags; struct rpc_clnt *cl_cb_client; + u32 cl_cb_ident; atomic_t cl_cb_set; struct nfsd4_callback cl_cb_null; + /* for all client information that callback code might need: */ + spinlock_t cl_lock; + /* for nfs41 */ struct list_head cl_sessions; struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ @@ -439,7 +445,7 @@ extern void nfsd4_do_callback_rpc(struct work_struct *); extern void nfsd4_cb_recall(struct nfs4_delegation *dp); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); -extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *); +extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfs4_put_delegation(struct nfs4_delegation *dp); extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); extern void nfsd4_init_recdir(char *recdir_name); From c23753dac1d21b39facd2ad3c7340dd275b3022f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 27 Sep 2010 16:22:30 -0400 Subject: [PATCH 49/99] nfsd4: fix alloc_init_session BUILD_BUG_ON() Note we're allocating an array of nfsd4_slot *'s, not nfsd4_slot's. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d3f12dcc1696..e30579432863 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -657,7 +657,7 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, if (status) goto out; - BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot) + BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *) + sizeof(struct nfsd4_session) > PAGE_SIZE); status = nfserr_jukebox; From dd93842457174b847b023314e5a501e5ed45caeb Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 27 Sep 2010 16:26:25 -0400 Subject: [PATCH 50/99] nfsd4: fix alloc_init_session return type This returns an nfs error, not -ERRNO. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e30579432863..ebddcc173ed8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -639,9 +639,7 @@ static inline int slot_bytes(struct nfsd4_channel_attrs *ca) return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; } -static int -alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, - struct nfsd4_create_session *cses) +static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses) { struct nfsd4_session *new, tmp; struct nfsd4_slot *sp; From 5b6feee9608dce7afd2646f457c93e612526d1d8 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 27 Sep 2010 17:12:05 -0400 Subject: [PATCH 51/99] nfsd4: clean up session allocation Changes: - make sure session memory reservation is released on failure path. - use min_t()/min() for more compact code in several places. - break alloc_init_session into smaller pieces. - miscellaneous other cleanup. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 211 +++++++++++++++++++------------------------- 1 file changed, 89 insertions(+), 122 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ebddcc173ed8..f86476c23b2f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -533,94 +533,6 @@ gen_sessionid(struct nfsd4_session *ses) */ #define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) -/* - * Give the client the number of ca_maxresponsesize_cached slots it - * requests, of size bounded by NFSD_SLOT_CACHE_SIZE, - * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more - * than NFSD_MAX_SLOTS_PER_SESSION. - * - * If we run out of reserved DRC memory we should (up to a point) - * re-negotiate active sessions and reduce their slot usage to make - * rooom for new connections. For now we just fail the create session. - */ -static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan) -{ - int mem, size = fchan->maxresp_cached; - - if (fchan->maxreqs < 1) - return nfserr_inval; - - if (size < NFSD_MIN_HDR_SEQ_SZ) - size = NFSD_MIN_HDR_SEQ_SZ; - size -= NFSD_MIN_HDR_SEQ_SZ; - if (size > NFSD_SLOT_CACHE_SIZE) - size = NFSD_SLOT_CACHE_SIZE; - - /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */ - mem = fchan->maxreqs * size; - if (mem > NFSD_MAX_MEM_PER_SESSION) { - fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size; - if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) - fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; - mem = fchan->maxreqs * size; - } - - spin_lock(&nfsd_drc_lock); - /* bound the total session drc memory ussage */ - if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) { - fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size; - mem = fchan->maxreqs * size; - } - nfsd_drc_mem_used += mem; - spin_unlock(&nfsd_drc_lock); - - if (fchan->maxreqs == 0) - return nfserr_jukebox; - - fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ; - return 0; -} - -/* - * fchan holds the client values on input, and the server values on output - * sv_max_mesg is the maximum payload plus one page for overhead. - */ -static int init_forechannel_attrs(struct svc_rqst *rqstp, - struct nfsd4_channel_attrs *session_fchan, - struct nfsd4_channel_attrs *fchan) -{ - int status = 0; - __u32 maxcount = nfsd_serv->sv_max_mesg; - - /* headerpadsz set to zero in encode routine */ - - /* Use the client's max request and max response size if possible */ - if (fchan->maxreq_sz > maxcount) - fchan->maxreq_sz = maxcount; - session_fchan->maxreq_sz = fchan->maxreq_sz; - - if (fchan->maxresp_sz > maxcount) - fchan->maxresp_sz = maxcount; - session_fchan->maxresp_sz = fchan->maxresp_sz; - - /* Use the client's maxops if possible */ - if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) - fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; - session_fchan->maxops = fchan->maxops; - - /* FIXME: Error means no more DRC pages so the server should - * recover pages from existing sessions. For now fail session - * creation. - */ - status = set_forechannel_drc_size(fchan); - - session_fchan->maxresp_cached = fchan->maxresp_cached; - session_fchan->maxreqs = fchan->maxreqs; - - dprintk("%s status %d\n", __func__, status); - return status; -} - static void free_session_slots(struct nfsd4_session *ses) { @@ -639,63 +551,118 @@ static inline int slot_bytes(struct nfsd4_channel_attrs *ca) return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; } -static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses) +static int nfsd4_sanitize_slot_size(u32 size) { - struct nfsd4_session *new, tmp; - struct nfsd4_slot *sp; - int idx, slotsize, cachesize, i; - int status; + size -= NFSD_MIN_HDR_SEQ_SZ; /* We don't cache the rpc header */ + size = min_t(u32, size, NFSD_SLOT_CACHE_SIZE); - memset(&tmp, 0, sizeof(tmp)); + return size; +} - /* FIXME: For now, we just accept the client back channel attributes. */ - tmp.se_bchannel = cses->back_channel; - status = init_forechannel_attrs(rqstp, &tmp.se_fchannel, - &cses->fore_channel); - if (status) - goto out; +/* + * XXX: If we run out of reserved DRC memory we could (up to a point) + * re-negotiate active sessions and reduce their slot usage to make + * rooom for new connections. For now we just fail the create session. + */ +static int nfsd4_get_drc_mem(int slotsize, u32 num) +{ + int avail; + + num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION); + + spin_lock(&nfsd_drc_lock); + avail = min_t(int, NFSD_MAX_MEM_PER_SESSION, + nfsd_drc_max_mem - nfsd_drc_mem_used); + num = min_t(int, num, avail / slotsize); + nfsd_drc_mem_used += num * slotsize; + spin_unlock(&nfsd_drc_lock); + + return num; +} + +static void nfsd4_put_drc_mem(int slotsize, int num) +{ + spin_lock(&nfsd_drc_lock); + nfsd_drc_mem_used -= slotsize * num; + spin_unlock(&nfsd_drc_lock); +} + +static struct nfsd4_session *alloc_session(int slotsize, int numslots) +{ + struct nfsd4_session *new; + int mem, i; BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *) - + sizeof(struct nfsd4_session) > PAGE_SIZE); + + sizeof(struct nfsd4_session) > PAGE_SIZE); + mem = numslots * sizeof(struct nfsd4_slot *); - status = nfserr_jukebox; - /* allocate struct nfsd4_session and slot table pointers in one piece */ - slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *); - new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); + new = kzalloc(sizeof(*new) + mem, GFP_KERNEL); if (!new) - goto out; - - memcpy(new, &tmp, sizeof(*new)); - + return NULL; /* allocate each struct nfsd4_slot and data cache in one piece */ - cachesize = slot_bytes(&new->se_fchannel); - for (i = 0; i < new->se_fchannel.maxreqs; i++) { - sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL); - if (!sp) + for (i = 0; i < numslots; i++) { + mem = sizeof(struct nfsd4_slot) + slotsize; + new->se_slots[i] = kzalloc(mem, GFP_KERNEL); + if (!new->se_slots[i]) goto out_free; - new->se_slots[i] = sp; } + return new; +out_free: + while (i--) + kfree(new->se_slots[i]); + kfree(new); + return NULL; +} + +static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4_channel_attrs *req, int numslots, int slotsize) +{ + u32 maxrpc = nfsd_serv->sv_max_mesg; + + new->maxreqs = numslots; + new->maxresp_cached = slotsize + NFSD_MIN_HDR_SEQ_SZ; + new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc); + new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc); + new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND); +} + +static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses) +{ + struct nfsd4_session *new; + struct nfsd4_channel_attrs *fchan = &cses->fore_channel; + int numslots, slotsize; + int idx; + + /* + * Note decreasing slot size below client's request may + * make it difficult for client to function correctly, whereas + * decreasing the number of slots will (just?) affect + * performance. When short on memory we therefore prefer to + * decrease number of slots instead of their size. + */ + slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached); + numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs); + + new = alloc_session(slotsize, numslots); + if (!new) { + nfsd4_put_drc_mem(slotsize, fchan->maxreqs); + return nfserr_jukebox; + } + init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize); new->se_client = clp; gen_sessionid(new); - idx = hash_sessionid(&new->se_sessionid); memcpy(clp->cl_sessionid.data, new->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); new->se_flags = cses->flags; kref_init(&new->se_ref); + idx = hash_sessionid(&new->se_sessionid); spin_lock(&client_lock); list_add(&new->se_hash, &sessionid_hashtbl[idx]); list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&client_lock); - status = nfs_ok; -out: - return status; -out_free: - free_session_slots(new); - kfree(new); - goto out; + return nfs_ok; } /* caller must hold client_lock */ From c7662518c781edc8059cd9737d18168154bf7510 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 6 Jun 2010 18:12:14 -0400 Subject: [PATCH 52/99] nfsd4: keep per-session list of connections The spec requires us in various places to keep track of the connections associated with each session. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 69 ++++++++++++++++++++++++++++++++++---------- fs/nfsd/state.h | 8 +++++ include/linux/nfs4.h | 3 ++ 3 files changed, 65 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f86476c23b2f..c7c1a7afa197 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -625,11 +625,58 @@ static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4 new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND); } +static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + struct nfsd4_conn *conn; + + conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL); + if (!conn) + return nfserr_jukebox; + conn->cn_flags = NFS4_CDFC4_FORE; + svc_xprt_get(rqstp->rq_xprt); + conn->cn_xprt = rqstp->rq_xprt; + + spin_lock(&clp->cl_lock); + list_add(&conn->cn_persession, &ses->se_conns); + spin_unlock(&clp->cl_lock); + + return nfs_ok; +} + +static void free_conn(struct nfsd4_conn *c) +{ + svc_xprt_put(c->cn_xprt); + kfree(c); +} + +void free_session(struct kref *kref) +{ + struct nfsd4_session *ses; + int mem; + + ses = container_of(kref, struct nfsd4_session, se_ref); + while (!list_empty(&ses->se_conns)) { + struct nfsd4_conn *c; + c = list_first_entry(&ses->se_conns, struct nfsd4_conn, cn_persession); + list_del(&c->cn_persession); + free_conn(c); + } + spin_lock(&nfsd_drc_lock); + mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel); + nfsd_drc_mem_used -= mem; + spin_unlock(&nfsd_drc_lock); + free_session_slots(ses); + kfree(ses); +} + + static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses) { struct nfsd4_session *new; struct nfsd4_channel_attrs *fchan = &cses->fore_channel; int numslots, slotsize; + int status; int idx; /* @@ -654,6 +701,8 @@ static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp memcpy(clp->cl_sessionid.data, new->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); + INIT_LIST_HEAD(&new->se_conns); + new->se_flags = cses->flags; kref_init(&new->se_ref); idx = hash_sessionid(&new->se_sessionid); @@ -662,6 +711,11 @@ static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&client_lock); + status = nfsd4_new_conn(rqstp, new); + if (status) { + free_session(&new->se_ref); + return nfserr_jukebox; + } return nfs_ok; } @@ -694,21 +748,6 @@ unhash_session(struct nfsd4_session *ses) list_del(&ses->se_perclnt); } -void -free_session(struct kref *kref) -{ - struct nfsd4_session *ses; - int mem; - - ses = container_of(kref, struct nfsd4_session, se_ref); - spin_lock(&nfsd_drc_lock); - mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel); - nfsd_drc_mem_used -= mem; - spin_unlock(&nfsd_drc_lock); - free_session_slots(ses); - kfree(ses); -} - /* must be called under the client_lock */ static inline void renew_client_locked(struct nfs4_client *clp) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 58bc2a63ca14..29413c2ed270 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -152,6 +152,13 @@ struct nfsd4_clid_slot { struct nfsd4_create_session sl_cr_ses; }; +struct nfsd4_conn { + struct list_head cn_persession; + struct svc_xprt *cn_xprt; +/* CDFC4_FORE, CDFC4_BACK: */ + unsigned char cn_flags; +}; + struct nfsd4_session { struct kref se_ref; struct list_head se_hash; /* hash by sessionid */ @@ -161,6 +168,7 @@ struct nfsd4_session { struct nfs4_sessionid se_sessionid; struct nfsd4_channel_attrs se_fchannel; struct nfsd4_channel_attrs se_bchannel; + struct list_head se_conns; struct nfsd4_slot *se_slots[]; /* forward channel slots */ }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 07e40c625972..79b15fb2f304 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -61,6 +61,9 @@ #define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL 0x10000 #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000 +#define NFS4_CDFC4_FORE 0x1 +#define NFS4_CDFC4_BACK 0x2 + #define NFS4_SET_TO_SERVER_TIME 0 #define NFS4_SET_TO_CLIENT_TIME 1 From edc7a894034acb4c7ff8305716ca5df8aaf8e642 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 22 Mar 2010 15:37:17 -0400 Subject: [PATCH 53/99] nfsd: provide callbacks on svc_xprt deletion NFSv4.1 needs warning when a client tcp connection goes down, if that connection is being used as a backchannel, so that it can warn the client that it has lost the backchannel connection. Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 25 +++++++++++++++++++++++++ net/sunrpc/svc_xprt.c | 15 +++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index bb182979569e..bbdb680ffbe9 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -33,6 +33,16 @@ struct svc_xprt_class { u32 xcl_max_payload; }; +/* + * This is embedded in an object that wants a callback before deleting + * an xprt; intended for use by NFSv4.1, which needs to know when a + * client's tcp connection (and hence possibly a backchannel) goes away. + */ +struct svc_xpt_user { + struct list_head list; + void (*callback)(struct svc_xpt_user *); +}; + struct svc_xprt { struct svc_xprt_class *xpt_class; struct svc_xprt_ops *xpt_ops; @@ -67,10 +77,25 @@ struct svc_xprt { struct sockaddr_storage xpt_remote; /* remote peer's address */ size_t xpt_remotelen; /* length of address */ struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ + struct list_head xpt_users; /* callbacks on free */ struct net *xpt_net; }; +static inline void register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u) +{ + spin_lock(&xpt->xpt_lock); + list_add(&u->list, &xpt->xpt_users); + spin_unlock(&xpt->xpt_lock); +} + +static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u) +{ + spin_lock(&xpt->xpt_lock); + list_del_init(&u->list); + spin_unlock(&xpt->xpt_lock); +} + int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *, diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 678b6ee4da7b..12025eedc781 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -156,6 +156,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, INIT_LIST_HEAD(&xprt->xpt_list); INIT_LIST_HEAD(&xprt->xpt_ready); INIT_LIST_HEAD(&xprt->xpt_deferred); + INIT_LIST_HEAD(&xprt->xpt_users); mutex_init(&xprt->xpt_mutex); spin_lock_init(&xprt->xpt_lock); set_bit(XPT_BUSY, &xprt->xpt_flags); @@ -881,6 +882,19 @@ static void svc_age_temp_xprts(unsigned long closure) mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); } +static void call_xpt_users(struct svc_xprt *xprt) +{ + struct svc_xpt_user *u; + + spin_lock(&xprt->xpt_lock); + while (!list_empty(&xprt->xpt_users)) { + u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list); + list_del(&u->list); + u->callback(u); + } + spin_unlock(&xprt->xpt_lock); +} + /* * Remove a dead transport */ @@ -913,6 +927,7 @@ void svc_delete_xprt(struct svc_xprt *xprt) while ((dr = svc_deferred_dequeue(xprt)) != NULL) kfree(dr); + call_xpt_users(xprt); svc_xprt_put(xprt); } From 19cf5c026f3ee06027523e59478e3fa54f573e5e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 6 Jun 2010 18:37:16 -0400 Subject: [PATCH 54/99] nfsd4: use callbacks on svc_xprt_deletion Remove connections from the list when they go down. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 51 +++++++++++++++++++++++++++++++++++++-------- fs/nfsd/state.h | 3 +++ 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c7c1a7afa197..b7e9793b58f5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -625,6 +625,25 @@ static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4 new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND); } +static void free_conn(struct nfsd4_conn *c) +{ + svc_xprt_put(c->cn_xprt); + kfree(c); +} + +static void nfsd4_conn_lost(struct svc_xpt_user *u) +{ + struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user); + struct nfs4_client *clp = c->cn_session->se_client; + + spin_lock(&clp->cl_lock); + if (!list_empty(&c->cn_persession)) { + list_del(&c->cn_persession); + free_conn(c); + } + spin_unlock(&clp->cl_lock); +} + static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; @@ -636,18 +655,34 @@ static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) conn->cn_flags = NFS4_CDFC4_FORE; svc_xprt_get(rqstp->rq_xprt); conn->cn_xprt = rqstp->rq_xprt; + conn->cn_session = ses; spin_lock(&clp->cl_lock); list_add(&conn->cn_persession, &ses->se_conns); spin_unlock(&clp->cl_lock); + conn->cn_xpt_user.callback = nfsd4_conn_lost; + register_xpt_user(rqstp->rq_xprt, &conn->cn_xpt_user); return nfs_ok; } -static void free_conn(struct nfsd4_conn *c) +static void nfsd4_del_conns(struct nfsd4_session *s) { - svc_xprt_put(c->cn_xprt); - kfree(c); + struct nfs4_client *clp = s->se_client; + struct nfsd4_conn *c; + + spin_lock(&clp->cl_lock); + while (!list_empty(&s->se_conns)) { + c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession); + list_del_init(&c->cn_persession); + spin_unlock(&clp->cl_lock); + + unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user); + free_conn(c); + + spin_lock(&clp->cl_lock); + } + spin_unlock(&clp->cl_lock); } void free_session(struct kref *kref) @@ -656,12 +691,7 @@ void free_session(struct kref *kref) int mem; ses = container_of(kref, struct nfsd4_session, se_ref); - while (!list_empty(&ses->se_conns)) { - struct nfsd4_conn *c; - c = list_first_entry(&ses->se_conns, struct nfsd4_conn, cn_persession); - list_del(&c->cn_persession); - free_conn(c); - } + nfsd4_del_conns(ses); spin_lock(&nfsd_drc_lock); mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel); nfsd_drc_mem_used -= mem; @@ -1552,6 +1582,9 @@ nfsd4_destroy_session(struct svc_rqst *r, /* wait for callbacks */ nfsd4_shutdown_callback(ses->se_client); nfs4_unlock_state(); + + nfsd4_del_conns(ses); + nfsd4_put_session(ses); status = nfs_ok; out: diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 29413c2ed270..8d5e2370cce0 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -35,6 +35,7 @@ #ifndef _NFSD4_STATE_H #define _NFSD4_STATE_H +#include #include #include "nfsfh.h" @@ -155,6 +156,8 @@ struct nfsd4_clid_slot { struct nfsd4_conn { struct list_head cn_persession; struct svc_xprt *cn_xprt; + struct svc_xpt_user cn_xpt_user; + struct nfsd4_session *cn_session; /* CDFC4_FORE, CDFC4_BACK: */ unsigned char cn_flags; }; From db90681d6eff89efc1eee523e1cb77eb632a6cf7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 29 Sep 2010 15:29:32 -0400 Subject: [PATCH 55/99] nfsd4: refactor connection allocation Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b7e9793b58f5..3b4d74cbb6c8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -644,25 +644,45 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u) spin_unlock(&clp->cl_lock); } -static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) +static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp) { - struct nfs4_client *clp = ses->se_client; struct nfsd4_conn *conn; conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL); if (!conn) - return nfserr_jukebox; - conn->cn_flags = NFS4_CDFC4_FORE; + return NULL; svc_xprt_get(rqstp->rq_xprt); conn->cn_xprt = rqstp->rq_xprt; - conn->cn_session = ses; + conn->cn_flags = NFS4_CDFC4_FORE; + INIT_LIST_HEAD(&conn->cn_xpt_user.list); + return conn; +} + +static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; spin_lock(&clp->cl_lock); + conn->cn_session = ses; list_add(&conn->cn_persession, &ses->se_conns); spin_unlock(&clp->cl_lock); +} +static void nfsd4_register_conn(struct nfsd4_conn *conn) +{ conn->cn_xpt_user.callback = nfsd4_conn_lost; - register_xpt_user(rqstp->rq_xprt, &conn->cn_xpt_user); + register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); +} + +static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) +{ + struct nfsd4_conn *conn; + + conn = alloc_conn(rqstp); + if (!conn) + return nfserr_jukebox; + nfsd4_hash_conn(conn, ses); + nfsd4_register_conn(conn); return nfs_ok; } From 328ead287220711c3ad4490b1f3f691855df4039 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 29 Sep 2010 16:11:06 -0400 Subject: [PATCH 56/99] nfsd4: add new connections to session As long as we're not implementing any session security, we should just automatically add any new connections that come along to the list of sessions associated with the session. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 49 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3b4d74cbb6c8..596702e157c9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -658,13 +658,18 @@ static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp) return conn; } +static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) +{ + conn->cn_session = ses; + list_add(&conn->cn_persession, &ses->se_conns); +} + static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; spin_lock(&clp->cl_lock); - conn->cn_session = ses; - list_add(&conn->cn_persession, &ses->se_conns); + __nfsd4_hash_conn(conn, ses); spin_unlock(&clp->cl_lock); } @@ -1612,6 +1617,44 @@ nfsd4_destroy_session(struct svc_rqst *r, return status; } +static struct nfsd4_conn *__nfsd4_find_conn(struct svc_rqst *r, struct nfsd4_session *s) +{ + struct nfsd4_conn *c; + + list_for_each_entry(c, &s->se_conns, cn_persession) { + if (c->cn_xprt == r->rq_xprt) { + return c; + } + } + return NULL; +} + +static void nfsd4_sequence_check_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + struct nfsd4_conn *c, *new = NULL; + + spin_lock(&clp->cl_lock); + c = __nfsd4_find_conn(rqstp, ses); + spin_unlock(&clp->cl_lock); + if (c) + return; + + new = alloc_conn(rqstp); + + spin_lock(&clp->cl_lock); + c = __nfsd4_find_conn(rqstp, ses); + if (c) { + spin_unlock(&clp->cl_lock); + free_conn(new); + return; + } + __nfsd4_hash_conn(new, ses); + spin_unlock(&clp->cl_lock); + nfsd4_register_conn(new); + return; +} + __be32 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1656,6 +1699,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (status) goto out; + nfsd4_sequence_check_conn(rqstp, session); + /* Success! bump slot seqid */ slot->sl_inuse = true; slot->sl_seqid = seq->seqid; From 33515142156efc9ab5dbfe93ff8d4765559dc987 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 2 Oct 2010 18:42:39 -0400 Subject: [PATCH 57/99] nfsd4: return expired on unfound stateid's Commit 78155ed75f470710f2aecb3e75e3d97107ba8374 "nfsd4: distinguish expired from stale stateids" attempted to distinguish expired and stale stateid's using time information that may not have been completely reliable, so I reverted it. That was throwing out the baby with the bathwater; we still do want to return expired, but let's do that using the simpler approach of just assuming any stateid is expired if it looks like it was given out by the current server instance, but we can't find it any more. This may help clients that are recovering from network partitions. Reported-by: Bian Naimeng Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 596702e157c9..02c23b7c5cd5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3046,7 +3046,11 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, if (STALE_STATEID(stateid)) goto out; - status = nfserr_bad_stateid; + /* + * We assume that any stateid that has the current boot time, + * but that we can't find, is expired: + */ + status = nfserr_expired; if (is_delegation_stateid(stateid)) { dp = find_delegation_stateid(ino, stateid); if (!dp) @@ -3066,6 +3070,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, stp = find_stateid(stateid, flags); if (!stp) goto out; + status = nfserr_bad_stateid; if (nfs4_check_fh(current_fh, stp)) goto out; if (!stp->st_stateowner->so_confirmed) @@ -3140,8 +3145,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, * a replayed close: */ sop = search_close_lru(stateid->si_stateownerid, flags); + /* It's not stale; let's assume it's expired: */ if (sop == NULL) - return nfserr_bad_stateid; + return nfserr_expired; *sopp = sop; goto check_replay; } @@ -3406,6 +3412,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_bad_stateid; if (!is_delegation_stateid(stateid)) goto out; + status = nfserr_expired; dp = find_delegation_stateid(inode, stateid); if (!dp) goto out; From d29068c431599fa96729556846562eb18429092d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Oct 2010 15:29:46 +1100 Subject: [PATCH 58/99] sunrpc: Simplify cache_defer_req and related functions. The return value from cache_defer_req is somewhat confusing. Various different error codes are returned, but the single caller is only interested in success or failure. In fact it can measure this success or failure itself by checking CACHE_PENDING, which makes the point of the code more explicit. So change cache_defer_req to return 'void' and test CACHE_PENDING after it completes, to see if the request was actually deferred or not. Similarly setup_deferral and cache_wait_req don't need a return value, so make them void and remove some code. The call to cache_revisit_request (to guard against a race) is only needed for the second call to setup_deferral, so move it out of setup_deferral to after that second call. With the first call the race is handled differently (by explicitly calling 'wait_for_completion'). Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 58 ++++++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 36 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 1e72cc955931..49115b107fbd 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -38,7 +38,7 @@ #define RPCDBG_FACILITY RPCDBG_CACHE -static int cache_defer_req(struct cache_req *req, struct cache_head *item); +static void cache_defer_req(struct cache_req *req, struct cache_head *item); static void cache_revisit_request(struct cache_head *item); static void cache_init(struct cache_head *h) @@ -269,7 +269,8 @@ int cache_check(struct cache_detail *detail, } if (rv == -EAGAIN) { - if (cache_defer_req(rqstp, h) < 0) { + cache_defer_req(rqstp, h); + if (!test_bit(CACHE_PENDING, &h->flags)) { /* Request is not deferred */ rv = cache_is_valid(detail, h); if (rv == -EAGAIN) @@ -525,7 +526,7 @@ static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_he hlist_add_head(&dreq->hash, &cache_defer_hash[hash]); } -static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item) +static void setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item) { struct cache_deferred_req *discard; @@ -547,13 +548,6 @@ static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *it if (discard) /* there was one too many */ discard->revisit(discard, 1); - - if (!test_bit(CACHE_PENDING, &item->flags)) { - /* must have just been validated... */ - cache_revisit_request(item); - return -EAGAIN; - } - return 0; } struct thread_deferred_req { @@ -568,18 +562,17 @@ static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many) complete(&dr->completion); } -static int cache_wait_req(struct cache_req *req, struct cache_head *item) +static void cache_wait_req(struct cache_req *req, struct cache_head *item) { struct thread_deferred_req sleeper; struct cache_deferred_req *dreq = &sleeper.handle; - int ret; sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion); dreq->revisit = cache_restart_thread; - ret = setup_deferral(dreq, item); + setup_deferral(dreq, item); - if (ret || + if (!test_bit(CACHE_PENDING, &item->flags) || wait_for_completion_interruptible_timeout( &sleeper.completion, req->thread_wait) <= 0) { /* The completion wasn't completed, so we need @@ -599,41 +592,34 @@ static int cache_wait_req(struct cache_req *req, struct cache_head *item) wait_for_completion(&sleeper.completion); } } - if (test_bit(CACHE_PENDING, &item->flags)) { - /* item is still pending, try request - * deferral - */ - return -ETIMEDOUT; - } - /* only return success if we actually deferred the - * request. In this case we waited until it was - * answered so no deferral has happened - rather - * an answer already exists. - */ - return -EEXIST; } -static int cache_defer_req(struct cache_req *req, struct cache_head *item) +static void cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq; - int ret; - if (cache_defer_cnt >= DFR_MAX) { + if (cache_defer_cnt >= DFR_MAX) /* too much in the cache, randomly drop this one, * or continue and drop the oldest */ if (net_random()&1) - return -ENOMEM; - } + return; + + if (req->thread_wait) { - ret = cache_wait_req(req, item); - if (ret != -ETIMEDOUT) - return ret; + cache_wait_req(req, item); + if (!test_bit(CACHE_PENDING, &item->flags)) + return; } dreq = req->defer(req); if (dreq == NULL) - return -ENOMEM; - return setup_deferral(dreq, item); + return; + setup_deferral(dreq, item); + if (!test_bit(CACHE_PENDING, &item->flags)) + /* Bit could have been cleared before we managed to + * set up the deferral, so need to revisit just in case + */ + cache_revisit_request(item); } static void cache_revisit_request(struct cache_head *item) From e33534d54f1fde3e541f64fa5ad0dd379fc45fa7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Oct 2010 15:29:46 +1100 Subject: [PATCH 59/99] sunrpc/cache: centralise handling of size limit on deferred list. We limit the number of 'defer' requests to DFR_MAX. The imposition of this limit is spread about a bit - sometime we don't add new things to the list, sometimes we remove old things. Also it is currently applied to requests which we are 'waiting' for rather than 'deferring'. This doesn't seem ideal as 'waiting' requests are naturally limited by the number of threads. So gather the DFR_MAX handling code to one place and only apply it to requests that are actually being deferred. This means that not all 'cache_deferred_req' structures go on the 'cache_defer_list, so we need to be careful when adding and removing things. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 67 +++++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 49115b107fbd..ba61d0fa4b80 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -513,22 +513,25 @@ static int cache_defer_cnt; static void __unhash_deferred_req(struct cache_deferred_req *dreq) { - list_del_init(&dreq->recent); hlist_del_init(&dreq->hash); - cache_defer_cnt--; + if (!list_empty(&dreq->recent)) { + list_del_init(&dreq->recent); + cache_defer_cnt--; + } } static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item) { int hash = DFR_HASH(item); - list_add(&dreq->recent, &cache_defer_list); + INIT_LIST_HEAD(&dreq->recent); hlist_add_head(&dreq->hash, &cache_defer_hash[hash]); } -static void setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item) +static void setup_deferral(struct cache_deferred_req *dreq, + struct cache_head *item, + int count_me) { - struct cache_deferred_req *discard; dreq->item = item; @@ -536,18 +539,13 @@ static void setup_deferral(struct cache_deferred_req *dreq, struct cache_head *i __hash_deferred_req(dreq, item); - /* it is in, now maybe clean up */ - discard = NULL; - if (++cache_defer_cnt > DFR_MAX) { - discard = list_entry(cache_defer_list.prev, - struct cache_deferred_req, recent); - __unhash_deferred_req(discard); + if (count_me) { + cache_defer_cnt++; + list_add(&dreq->recent, &cache_defer_list); } + spin_unlock(&cache_defer_lock); - if (discard) - /* there was one too many */ - discard->revisit(discard, 1); } struct thread_deferred_req { @@ -570,7 +568,7 @@ static void cache_wait_req(struct cache_req *req, struct cache_head *item) sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion); dreq->revisit = cache_restart_thread; - setup_deferral(dreq, item); + setup_deferral(dreq, item, 0); if (!test_bit(CACHE_PENDING, &item->flags) || wait_for_completion_interruptible_timeout( @@ -594,18 +592,37 @@ static void cache_wait_req(struct cache_req *req, struct cache_head *item) } } +static void cache_limit_defers(void) +{ + /* Make sure we haven't exceed the limit of allowed deferred + * requests. + */ + struct cache_deferred_req *discard = NULL; + + if (cache_defer_cnt <= DFR_MAX) + return; + + spin_lock(&cache_defer_lock); + + /* Consider removing either the first or the last */ + if (cache_defer_cnt > DFR_MAX) { + if (net_random() & 1) + discard = list_entry(cache_defer_list.next, + struct cache_deferred_req, recent); + else + discard = list_entry(cache_defer_list.prev, + struct cache_deferred_req, recent); + __unhash_deferred_req(discard); + } + spin_unlock(&cache_defer_lock); + if (discard) + discard->revisit(discard, 1); +} + static void cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq; - if (cache_defer_cnt >= DFR_MAX) - /* too much in the cache, randomly drop this one, - * or continue and drop the oldest - */ - if (net_random()&1) - return; - - if (req->thread_wait) { cache_wait_req(req, item); if (!test_bit(CACHE_PENDING, &item->flags)) @@ -614,12 +631,14 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item) dreq = req->defer(req); if (dreq == NULL) return; - setup_deferral(dreq, item); + setup_deferral(dreq, item, 1); if (!test_bit(CACHE_PENDING, &item->flags)) /* Bit could have been cleared before we managed to * set up the deferral, so need to revisit just in case */ cache_revisit_request(item); + + cache_limit_defers(); } static void cache_revisit_request(struct cache_head *item) From 70dc78da2cc76e7804c597fea3a0aff67ec8d7ad Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 5 Oct 2010 20:48:02 +0400 Subject: [PATCH 60/99] sunrpc: Use helper to set v4 mapped addr in ip_map_parse Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 8b378f91f255..560677d187f1 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -220,10 +220,9 @@ static int ip_map_parse(struct cache_detail *cd, switch (address.sa.sa_family) { case AF_INET: /* Form a mapped IPv4 address in sin6 */ - memset(&sin6, 0, sizeof(sin6)); sin6.sin6_family = AF_INET6; - sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - sin6.sin6_addr.s6_addr32[3] = address.s4.sin_addr.s_addr; + ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr, + &sin6.sin6_addr); break; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: From ecec6e34e18660799444c5a163c7313a20fba701 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 11 Oct 2010 16:49:44 -0400 Subject: [PATCH 61/99] nfsd4: expire clients more promptly Expire clients more promptly, at the expense of possibly running the laundromat thread more frequently. Though it's not the default, I'd like it to be feasible to run with a lease time of just a few seconds, at which point a minimum 10 second wait between laundromat runs seems a little much. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index b76ac3a82e39..6b641cf2c19a 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -249,7 +249,7 @@ extern time_t nfsd4_grace; #define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ #define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ -#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ +#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ /* * The following attributes are currently not supported by the NFSv4 server: From b432e6b3d9c1b4271c43f02b45136f33a8ed5820 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Tue, 12 Oct 2010 15:33:52 -0500 Subject: [PATCH 62/99] svcrdma: Change DMA mapping logic to avoid the page_address kernel API There was logic in the send path that assumed that a page containing data to send to the client has a KVA. This is not always the case and can result in data corruption when page_address returns zero and we end up DMA mapping zero. This patch changes the bus mapping logic to avoid page_address() where necessary and converts all calls from ib_dma_map_single to ib_dma_map_page in order to keep the map/unmap calls symmetric. Signed-off-by: Tom Tucker Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 18 +++--- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 80 +++++++++++++++++------- net/sunrpc/xprtrdma/svc_rdma_transport.c | 18 +++--- 3 files changed, 78 insertions(+), 38 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 0194de814933..926bdb44f3de 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -263,9 +263,9 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; for (page_no = 0; page_no < frmr->page_list_len; page_no++) { frmr->page_list->page_list[page_no] = - ib_dma_map_single(xprt->sc_cm_id->device, - page_address(rqstp->rq_arg.pages[page_no]), - PAGE_SIZE, DMA_FROM_DEVICE); + ib_dma_map_page(xprt->sc_cm_id->device, + rqstp->rq_arg.pages[page_no], 0, + PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; @@ -309,17 +309,21 @@ static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, int count) { int i; + unsigned long off; ctxt->count = count; ctxt->direction = DMA_FROM_DEVICE; for (i = 0; i < count; i++) { ctxt->sge[i].length = 0; /* in case map fails */ if (!frmr) { + BUG_ON(0 == virt_to_page(vec[i].iov_base)); + off = (unsigned long)vec[i].iov_base & ~PAGE_MASK; ctxt->sge[i].addr = - ib_dma_map_single(xprt->sc_cm_id->device, - vec[i].iov_base, - vec[i].iov_len, - DMA_FROM_DEVICE); + ib_dma_map_page(xprt->sc_cm_id->device, + virt_to_page(vec[i].iov_base), + off, + vec[i].iov_len, + DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[i].addr)) return -EINVAL; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index b15e1ebb2bfa..d4f5e0e43f09 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -70,8 +70,8 @@ * on extra page for the RPCRMDA header. */ static int fast_reg_xdr(struct svcxprt_rdma *xprt, - struct xdr_buf *xdr, - struct svc_rdma_req_map *vec) + struct xdr_buf *xdr, + struct svc_rdma_req_map *vec) { int sge_no; u32 sge_bytes; @@ -96,21 +96,25 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt, vec->count = 2; sge_no++; - /* Build the FRMR */ + /* Map the XDR head */ frmr->kva = frva; frmr->direction = DMA_TO_DEVICE; frmr->access_flags = 0; frmr->map_len = PAGE_SIZE; frmr->page_list_len = 1; + page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK; frmr->page_list->page_list[page_no] = - ib_dma_map_single(xprt->sc_cm_id->device, - (void *)xdr->head[0].iov_base, - PAGE_SIZE, DMA_TO_DEVICE); + ib_dma_map_page(xprt->sc_cm_id->device, + virt_to_page(xdr->head[0].iov_base), + page_off, + PAGE_SIZE - page_off, + DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; atomic_inc(&xprt->sc_dma_used); + /* Map the XDR page list */ page_off = xdr->page_base; page_bytes = xdr->page_len + page_off; if (!page_bytes) @@ -128,9 +132,9 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt, page_bytes -= sge_bytes; frmr->page_list->page_list[page_no] = - ib_dma_map_single(xprt->sc_cm_id->device, - page_address(page), - PAGE_SIZE, DMA_TO_DEVICE); + ib_dma_map_page(xprt->sc_cm_id->device, + page, page_off, + sge_bytes, DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; @@ -166,8 +170,10 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt, vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; frmr->page_list->page_list[page_no] = - ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE, - DMA_TO_DEVICE); + ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va), + page_off, + PAGE_SIZE, + DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; @@ -245,6 +251,35 @@ static int map_xdr(struct svcxprt_rdma *xprt, return 0; } +static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt, + struct xdr_buf *xdr, + u32 xdr_off, size_t len, int dir) +{ + struct page *page; + dma_addr_t dma_addr; + if (xdr_off < xdr->head[0].iov_len) { + /* This offset is in the head */ + xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK; + page = virt_to_page(xdr->head[0].iov_base); + } else { + xdr_off -= xdr->head[0].iov_len; + if (xdr_off < xdr->page_len) { + /* This offset is in the page list */ + page = xdr->pages[xdr_off >> PAGE_SHIFT]; + xdr_off &= ~PAGE_MASK; + } else { + /* This offset is in the tail */ + xdr_off -= xdr->page_len; + xdr_off += (unsigned long) + xdr->tail[0].iov_base & ~PAGE_MASK; + page = virt_to_page(xdr->tail[0].iov_base); + } + } + dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off, + min_t(size_t, PAGE_SIZE, len), dir); + return dma_addr; +} + /* Assumptions: * - We are using FRMR * - or - @@ -293,10 +328,9 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, sge[sge_no].length = sge_bytes; if (!vec->frmr) { sge[sge_no].addr = - ib_dma_map_single(xprt->sc_cm_id->device, - (void *) - vec->sge[xdr_sge_no].iov_base + sge_off, - sge_bytes, DMA_TO_DEVICE); + dma_map_xdr(xprt, &rqstp->rq_res, xdr_off, + sge_bytes, DMA_TO_DEVICE); + xdr_off += sge_bytes; if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge[sge_no].addr)) goto err; @@ -494,7 +528,8 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, * In all three cases, this function prepares the RPCRDMA header in * sge[0], the 'type' parameter indicates the type to place in the * RPCRDMA header, and the 'byte_count' field indicates how much of - * the XDR to include in this RDMA_SEND. + * the XDR to include in this RDMA_SEND. NB: The offset of the payload + * to send is zero in the XDR. */ static int send_reply(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, @@ -536,23 +571,24 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[0].lkey = rdma->sc_dma_lkey; ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); ctxt->sge[0].addr = - ib_dma_map_single(rdma->sc_cm_id->device, page_address(page), - ctxt->sge[0].length, DMA_TO_DEVICE); + ib_dma_map_page(rdma->sc_cm_id->device, page, 0, + ctxt->sge[0].length, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) goto err; atomic_inc(&rdma->sc_dma_used); ctxt->direction = DMA_TO_DEVICE; - /* Determine how many of our SGE are to be transmitted */ + /* Map the payload indicated by 'byte_count' */ for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { + int xdr_off = 0; sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); byte_count -= sge_bytes; if (!vec->frmr) { ctxt->sge[sge_no].addr = - ib_dma_map_single(rdma->sc_cm_id->device, - vec->sge[sge_no].iov_base, - sge_bytes, DMA_TO_DEVICE); + dma_map_xdr(rdma, &rqstp->rq_res, xdr_off, + sge_bytes, DMA_TO_DEVICE); + xdr_off += sge_bytes; if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[sge_no].addr)) goto err; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 950a206600c0..e87e000e984c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -121,7 +121,7 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) */ if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { atomic_dec(&xprt->sc_dma_used); - ib_dma_unmap_single(xprt->sc_cm_id->device, + ib_dma_unmap_page(xprt->sc_cm_id->device, ctxt->sge[i].addr, ctxt->sge[i].length, ctxt->direction); @@ -503,8 +503,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) BUG_ON(sge_no >= xprt->sc_max_sge); page = svc_rdma_get_page(); ctxt->pages[sge_no] = page; - pa = ib_dma_map_single(xprt->sc_cm_id->device, - page_address(page), PAGE_SIZE, + pa = ib_dma_map_page(xprt->sc_cm_id->device, + page, 0, PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) goto err_put_ctxt; @@ -800,8 +800,8 @@ static void frmr_unmap_dma(struct svcxprt_rdma *xprt, if (ib_dma_mapping_error(frmr->mr->device, addr)) continue; atomic_dec(&xprt->sc_dma_used); - ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE, - frmr->direction); + ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE, + frmr->direction); } } @@ -1276,7 +1276,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) atomic_read(&xprt->sc_sq_count) < xprt->sc_sq_depth); if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) - return 0; + return -ENOTCONN; continue; } /* Take a transport ref for each WR posted */ @@ -1322,8 +1322,8 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); /* Prepare SGE for local address */ - sge.addr = ib_dma_map_single(xprt->sc_cm_id->device, - page_address(p), PAGE_SIZE, DMA_FROM_DEVICE); + sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, + p, 0, PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { put_page(p); return; @@ -1350,7 +1350,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, if (ret) { dprintk("svcrdma: Error %d posting send for protocol error\n", ret); - ib_dma_unmap_single(xprt->sc_cm_id->device, + ib_dma_unmap_page(xprt->sc_cm_id->device, sge.addr, PAGE_SIZE, DMA_FROM_DEVICE); svc_rdma_put_context(ctxt, 1); From 4a84386fc27fdc7d2ea69fdbc641008e8f943159 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Tue, 12 Oct 2010 15:33:57 -0500 Subject: [PATCH 63/99] svcrdma: Cleanup DMA unmapping in error paths. There are several error paths in the code that do not unmap DMA. This patch adds calls to svc_rdma_unmap_dma to free these DMA contexts. Signed-off-by: Tom Tucker Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 1 + net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 ++ net/sunrpc/xprtrdma/svc_rdma_transport.c | 29 ++++++++++++------------ 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 926bdb44f3de..df67211c4baf 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -495,6 +495,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", err); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); goto out; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index d4f5e0e43f09..249a835b703f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -367,6 +367,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, goto err; return 0; err: + svc_rdma_unmap_dma(ctxt); + svc_rdma_put_frmr(xprt, vec->frmr); svc_rdma_put_context(ctxt, 0); /* Fatal error, close transport */ return -EIO; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index e87e000e984c..22f65cc46fe5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -512,9 +512,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) ctxt->sge[sge_no].addr = pa; ctxt->sge[sge_no].length = PAGE_SIZE; ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; + ctxt->count = sge_no + 1; buflen += PAGE_SIZE; } - ctxt->count = sge_no; recv_wr.next = NULL; recv_wr.sg_list = &ctxt->sge[0]; recv_wr.num_sge = ctxt->count; @@ -530,6 +530,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) return ret; err_put_ctxt: + svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); return -ENOMEM; } @@ -1308,7 +1309,6 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, enum rpcrdma_errcode err) { struct ib_send_wr err_wr; - struct ib_sge sge; struct page *p; struct svc_rdma_op_ctxt *ctxt; u32 *va; @@ -1321,26 +1321,27 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, /* XDR encode error */ length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); + ctxt = svc_rdma_get_context(xprt); + ctxt->direction = DMA_FROM_DEVICE; + ctxt->count = 1; + ctxt->pages[0] = p; + /* Prepare SGE for local address */ - sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, - p, 0, PAGE_SIZE, DMA_FROM_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { + ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device, + p, 0, length, DMA_FROM_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) { put_page(p); return; } atomic_inc(&xprt->sc_dma_used); - sge.lkey = xprt->sc_dma_lkey; - sge.length = length; - - ctxt = svc_rdma_get_context(xprt); - ctxt->count = 1; - ctxt->pages[0] = p; + ctxt->sge[0].lkey = xprt->sc_dma_lkey; + ctxt->sge[0].length = length; /* Prepare SEND WR */ memset(&err_wr, 0, sizeof err_wr); ctxt->wr_op = IB_WR_SEND; err_wr.wr_id = (unsigned long)ctxt; - err_wr.sg_list = &sge; + err_wr.sg_list = ctxt->sge; err_wr.num_sge = 1; err_wr.opcode = IB_WR_SEND; err_wr.send_flags = IB_SEND_SIGNALED; @@ -1350,9 +1351,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, if (ret) { dprintk("svcrdma: Error %d posting send for protocol error\n", ret); - ib_dma_unmap_page(xprt->sc_cm_id->device, - sge.addr, PAGE_SIZE, - DMA_FROM_DEVICE); + svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); } } From 5d4ec932972a0dd5486c59909e62dc62105d065c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 4 Oct 2010 16:51:23 +0400 Subject: [PATCH 64/99] sunrpc: Remove unused sock arg from xs_get_srcport Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f9964ef35e3e..304e2de2c70c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1515,7 +1515,7 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) xs_update_peer_port(xprt); } -static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock) +static unsigned short xs_get_srcport(struct sock_xprt *transport) { unsigned short port = transport->srcport; @@ -1542,7 +1542,7 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock) }; struct sockaddr_in *sa; int err, nloop = 0; - unsigned short port = xs_get_srcport(transport, sock); + unsigned short port = xs_get_srcport(transport); unsigned short last; sa = (struct sockaddr_in *)&transport->srcaddr; @@ -1575,7 +1575,7 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock) }; struct sockaddr_in6 *sa; int err, nloop = 0; - unsigned short port = xs_get_srcport(transport, sock); + unsigned short port = xs_get_srcport(transport); unsigned short last; sa = (struct sockaddr_in6 *)&transport->srcaddr; From baaf4e487a9c42b345bde14698fd566f864c9287 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 4 Oct 2010 16:51:56 +0400 Subject: [PATCH 65/99] sunrpc: Remove unused sock arg from xs_next_srcport Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 304e2de2c70c..024a64443203 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1524,7 +1524,7 @@ static unsigned short xs_get_srcport(struct sock_xprt *transport) return port; } -static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port) +static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port) { if (transport->srcport != 0) transport->srcport = 0; @@ -1558,7 +1558,7 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock) break; } last = port; - port = xs_next_srcport(transport, sock, port); + port = xs_next_srcport(transport, port); if (port > last) nloop++; } while (err == -EADDRINUSE && nloop != 2); @@ -1591,7 +1591,7 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock) break; } last = port; - port = xs_next_srcport(transport, sock, port); + port = xs_next_srcport(transport, port); if (port > last) nloop++; } while (err == -EADDRINUSE && nloop != 2); From a9f5f0f7bf72f3f1451e844681fb3cb5d0b1c80d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 4 Oct 2010 16:52:25 +0400 Subject: [PATCH 66/99] sunrpc: Get xprt pointer once in xs_tcp_setup_socket Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 024a64443203..a76446a4d670 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1851,12 +1851,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) * * Invoked by a work queue tasklet. */ -static void xs_tcp_setup_socket(struct rpc_xprt *xprt, - struct sock_xprt *transport, +static void xs_tcp_setup_socket(struct sock_xprt *transport, struct socket *(*create_sock)(struct rpc_xprt *, struct sock_xprt *)) { struct socket *sock = transport->sock; + struct rpc_xprt *xprt = &transport->xprt; int status = -EIO; if (xprt->shutdown) @@ -1958,9 +1958,8 @@ static void xs_tcp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); - struct rpc_xprt *xprt = &transport->xprt; - xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4); + xs_tcp_setup_socket(transport, xs_create_tcp_sock4); } static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt, @@ -1997,9 +1996,8 @@ static void xs_tcp_connect_worker6(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); - struct rpc_xprt *xprt = &transport->xprt; - xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6); + xs_tcp_setup_socket(transport, xs_create_tcp_sock6); } /** From 58dddac9c55c604f01152832c1c3d2c17a5adea9 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 4 Oct 2010 16:52:55 +0400 Subject: [PATCH 67/99] sunrpc: Remove duplicate xprt/transport arguments from calls The xs_tcp_reuse_connection takes the xprt only to pass it down to the xs_abort_connection. The later one can get it from the given transport itself. Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a76446a4d670..8ff57c59b445 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1754,12 +1754,12 @@ static void xs_udp_connect_worker6(struct work_struct *work) * We need to preserve the port number so the reply cache on the server can * find our cached RPC replies when we get around to reconnecting. */ -static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) +static void xs_abort_connection(struct sock_xprt *transport) { int result; struct sockaddr any; - dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); + dprintk("RPC: disconnecting xprt %p to reuse port\n", transport); /* * Disconnect the transport socket by doing a connect operation @@ -1769,13 +1769,13 @@ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transpo any.sa_family = AF_UNSPEC; result = kernel_connect(transport->sock, &any, sizeof(any), 0); if (!result) - xs_sock_mark_closed(xprt); + xs_sock_mark_closed(&transport->xprt); else dprintk("RPC: AF_UNSPEC connect return code %d\n", result); } -static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) +static void xs_tcp_reuse_connection(struct sock_xprt *transport) { unsigned int state = transport->inet->sk_state; @@ -1798,7 +1798,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *tra "sk_shutdown set to %d\n", __func__, transport->inet->sk_shutdown); } - xs_abort_connection(xprt, transport); + xs_abort_connection(transport); } static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) @@ -1875,7 +1875,7 @@ static void xs_tcp_setup_socket(struct sock_xprt *transport, abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); /* "close" the socket, preserving the local port */ - xs_tcp_reuse_connection(xprt, transport); + xs_tcp_reuse_connection(transport); if (abort_and_exit) goto out_eagain; From b65c0310611af73569f94c526a1e2323d99b380a Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 4 Oct 2010 16:53:46 +0400 Subject: [PATCH 68/99] sunrpc: Factor out udp sockets creation Make it look like the TCP sockets creation. Unfortunately the git diff made the patch look messy :( Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 102 ++++++++++++++++++++++++------------------ 1 file changed, 59 insertions(+), 43 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8ff57c59b445..df53dc55841d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1660,37 +1660,22 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) xs_udp_do_set_buffer_size(xprt); } -/** - * xs_udp_connect_worker4 - set up a UDP socket - * @work: RPC transport to connect - * - * Invoked by a work queue tasklet. - */ -static void xs_udp_connect_worker4(struct work_struct *work) +static void xs_udp_setup_socket(struct sock_xprt *transport, + struct socket *(*create_sock)(struct rpc_xprt *, + struct sock_xprt *)) { - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); struct rpc_xprt *xprt = &transport->xprt; struct socket *sock = transport->sock; - int err, status = -EIO; + int status = -EIO; if (xprt->shutdown) goto out; /* Start by resetting any existing state */ xs_reset_transport(transport); - - err = __sock_create(xprt->xprt_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); - if (err < 0) { - dprintk("RPC: can't create UDP transport socket (%d).\n", -err); + sock = create_sock(xprt, transport); + if (IS_ERR(sock)) goto out; - } - xs_reclassify_socket4(sock); - - if (xs_bind4(transport, sock)) { - sock_release(sock); - goto out; - } dprintk("RPC: worker connecting xprt %p via %s to " "%s (port %s)\n", xprt, @@ -1705,25 +1690,56 @@ static void xs_udp_connect_worker4(struct work_struct *work) xprt_wake_pending_tasks(xprt, status); } +/** + * xs_udp_connect_worker4 - set up a UDP socket + * @work: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ + +static struct socket *xs_create_udp_sock4(struct rpc_xprt *xprt, + struct sock_xprt *transport) +{ + struct socket *sock; + int err; + + err = __sock_create(xprt->xprt_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); + if (err < 0) { + dprintk("RPC: can't create UDP transport socket (%d).\n", -err); + goto out; + } + xs_reclassify_socket4(sock); + + if (xs_bind4(transport, sock)) { + sock_release(sock); + goto out; + } + + return sock; +out: + return ERR_PTR(err); +} + +static void xs_udp_connect_worker4(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + + xs_udp_setup_socket(transport, xs_create_udp_sock4); +} + /** * xs_udp_connect_worker6 - set up a UDP socket * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_udp_connect_worker6(struct work_struct *work) + +static struct socket *xs_create_udp_sock6(struct rpc_xprt *xprt, + struct sock_xprt *transport) { - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); - struct rpc_xprt *xprt = &transport->xprt; - struct socket *sock = transport->sock; - int err, status = -EIO; - - if (xprt->shutdown) - goto out; - - /* Start by resetting any existing state */ - xs_reset_transport(transport); + struct socket *sock; + int err; err = __sock_create(xprt->xprt_net, PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); if (err < 0) { @@ -1737,17 +1753,17 @@ static void xs_udp_connect_worker6(struct work_struct *work) goto out; } - dprintk("RPC: worker connecting xprt %p via %s to " - "%s (port %s)\n", xprt, - xprt->address_strings[RPC_DISPLAY_PROTO], - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PORT]); - - xs_udp_finish_connecting(xprt, sock); - status = 0; + return sock; out: - xprt_clear_connecting(xprt); - xprt_wake_pending_tasks(xprt, status); + return ERR_PTR(err); +} + +static void xs_udp_connect_worker6(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + + xs_udp_setup_socket(transport, xs_create_udp_sock6); } /* From 22f793268de3b4dff8abfcd873ba7afc1f34224f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 4 Oct 2010 16:54:26 +0400 Subject: [PATCH 69/99] sunrpc: Factor out v4 sockets creation The UDPv4 and TCPv4 socket creation callbacks now look very similar. Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 63 ++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index df53dc55841d..b73a605c0847 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1631,6 +1631,30 @@ static inline void xs_reclassify_socket6(struct socket *sock) } #endif +static struct socket *xs_create_sock4(struct rpc_xprt *xprt, + struct sock_xprt *transport, int type, int protocol) +{ + struct socket *sock; + int err; + + err = __sock_create(xprt->xprt_net, PF_INET, type, protocol, &sock, 1); + if (err < 0) { + dprintk("RPC: can't create %d transport socket (%d).\n", + protocol, -err); + goto out; + } + xs_reclassify_socket4(sock); + + if (xs_bind4(transport, sock)) { + sock_release(sock); + goto out; + } + + return sock; +out: + return ERR_PTR(err); +} + static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1700,24 +1724,7 @@ static void xs_udp_setup_socket(struct sock_xprt *transport, static struct socket *xs_create_udp_sock4(struct rpc_xprt *xprt, struct sock_xprt *transport) { - struct socket *sock; - int err; - - err = __sock_create(xprt->xprt_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); - if (err < 0) { - dprintk("RPC: can't create UDP transport socket (%d).\n", -err); - goto out; - } - xs_reclassify_socket4(sock); - - if (xs_bind4(transport, sock)) { - sock_release(sock); - goto out; - } - - return sock; -out: - return ERR_PTR(err); + return xs_create_sock4(xprt, transport, SOCK_DGRAM, IPPROTO_UDP); } static void xs_udp_connect_worker4(struct work_struct *work) @@ -1943,25 +1950,7 @@ static void xs_tcp_setup_socket(struct sock_xprt *transport, static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt, struct sock_xprt *transport) { - struct socket *sock; - int err; - - /* start from scratch */ - err = __sock_create(xprt->xprt_net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock, 1); - if (err < 0) { - dprintk("RPC: can't create TCP transport socket (%d).\n", - -err); - goto out_err; - } - xs_reclassify_socket4(sock); - - if (xs_bind4(transport, sock) < 0) { - sock_release(sock); - goto out_err; - } - return sock; -out_err: - return ERR_PTR(-EIO); + return xs_create_sock4(xprt, transport, SOCK_STREAM, IPPROTO_TCP); } /** From 22d44a7d8a03456aa6d0a047c051aa28728e6ecd Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 4 Oct 2010 16:54:55 +0400 Subject: [PATCH 70/99] sunrpc: Factor out v6 sockets creation Same patch for v6 protocols. Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 63 ++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b73a605c0847..96128d0fd8d2 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1655,6 +1655,30 @@ static struct socket *xs_create_sock4(struct rpc_xprt *xprt, return ERR_PTR(err); } +static struct socket *xs_create_sock6(struct rpc_xprt *xprt, + struct sock_xprt *transport, int type, int protocol) +{ + struct socket *sock; + int err; + + err = __sock_create(xprt->xprt_net, PF_INET6, type, protocol, &sock, 1); + if (err < 0) { + dprintk("RPC: can't create %d transport socket (%d).\n", + protocol, -err); + goto out; + } + xs_reclassify_socket6(sock); + + if (xs_bind6(transport, sock)) { + sock_release(sock); + goto out; + } + + return sock; +out: + return ERR_PTR(err); +} + static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1745,24 +1769,7 @@ static void xs_udp_connect_worker4(struct work_struct *work) static struct socket *xs_create_udp_sock6(struct rpc_xprt *xprt, struct sock_xprt *transport) { - struct socket *sock; - int err; - - err = __sock_create(xprt->xprt_net, PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); - if (err < 0) { - dprintk("RPC: can't create UDP transport socket (%d).\n", -err); - goto out; - } - xs_reclassify_socket6(sock); - - if (xs_bind6(transport, sock) < 0) { - sock_release(sock); - goto out; - } - - return sock; -out: - return ERR_PTR(err); + return xs_create_sock6(xprt, transport, SOCK_DGRAM, IPPROTO_UDP); } static void xs_udp_connect_worker6(struct work_struct *work) @@ -1970,25 +1977,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work) static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt, struct sock_xprt *transport) { - struct socket *sock; - int err; - - /* start from scratch */ - err = __sock_create(xprt->xprt_net, PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock, 1); - if (err < 0) { - dprintk("RPC: can't create TCP transport socket (%d).\n", - -err); - goto out_err; - } - xs_reclassify_socket6(sock); - - if (xs_bind6(transport, sock) < 0) { - sock_release(sock); - goto out_err; - } - return sock; -out_err: - return ERR_PTR(-EIO); + return xs_create_sock6(xprt, transport, SOCK_STREAM, IPPROTO_TCP); } /** From 573018c07e040b2c3f3cb8251f66fa4a5cb7425d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 4 Oct 2010 16:55:38 +0400 Subject: [PATCH 71/99] sunrpc: Call xs_create_sockX directly from setup_socket Remove now unneeded wrappers that just add type and protocol to socket creation callback. Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 40 ++++++++-------------------------------- 1 file changed, 8 insertions(+), 32 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 96128d0fd8d2..7fdf2bb956a9 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1710,7 +1710,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) static void xs_udp_setup_socket(struct sock_xprt *transport, struct socket *(*create_sock)(struct rpc_xprt *, - struct sock_xprt *)) + struct sock_xprt *, int type, int protocol)) { struct rpc_xprt *xprt = &transport->xprt; struct socket *sock = transport->sock; @@ -1721,7 +1721,7 @@ static void xs_udp_setup_socket(struct sock_xprt *transport, /* Start by resetting any existing state */ xs_reset_transport(transport); - sock = create_sock(xprt, transport); + sock = create_sock(xprt, transport, SOCK_DGRAM, IPPROTO_UDP); if (IS_ERR(sock)) goto out; @@ -1745,18 +1745,12 @@ static void xs_udp_setup_socket(struct sock_xprt *transport, * Invoked by a work queue tasklet. */ -static struct socket *xs_create_udp_sock4(struct rpc_xprt *xprt, - struct sock_xprt *transport) -{ - return xs_create_sock4(xprt, transport, SOCK_DGRAM, IPPROTO_UDP); -} - static void xs_udp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); - xs_udp_setup_socket(transport, xs_create_udp_sock4); + xs_udp_setup_socket(transport, xs_create_sock4); } /** @@ -1766,18 +1760,12 @@ static void xs_udp_connect_worker4(struct work_struct *work) * Invoked by a work queue tasklet. */ -static struct socket *xs_create_udp_sock6(struct rpc_xprt *xprt, - struct sock_xprt *transport) -{ - return xs_create_sock6(xprt, transport, SOCK_DGRAM, IPPROTO_UDP); -} - static void xs_udp_connect_worker6(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); - xs_udp_setup_socket(transport, xs_create_udp_sock6); + xs_udp_setup_socket(transport, xs_create_sock6); } /* @@ -1883,7 +1871,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) */ static void xs_tcp_setup_socket(struct sock_xprt *transport, struct socket *(*create_sock)(struct rpc_xprt *, - struct sock_xprt *)) + struct sock_xprt *, int type, int protocol)) { struct socket *sock = transport->sock; struct rpc_xprt *xprt = &transport->xprt; @@ -1894,7 +1882,7 @@ static void xs_tcp_setup_socket(struct sock_xprt *transport, if (!sock) { clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); - sock = create_sock(xprt, transport); + sock = create_sock(xprt, transport, SOCK_STREAM, IPPROTO_TCP); if (IS_ERR(sock)) { status = PTR_ERR(sock); goto out; @@ -1954,12 +1942,6 @@ static void xs_tcp_setup_socket(struct sock_xprt *transport, xprt_wake_pending_tasks(xprt, status); } -static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt, - struct sock_xprt *transport) -{ - return xs_create_sock4(xprt, transport, SOCK_STREAM, IPPROTO_TCP); -} - /** * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint * @work: RPC transport to connect @@ -1971,13 +1953,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work) struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); - xs_tcp_setup_socket(transport, xs_create_tcp_sock4); -} - -static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt, - struct sock_xprt *transport) -{ - return xs_create_sock6(xprt, transport, SOCK_STREAM, IPPROTO_TCP); + xs_tcp_setup_socket(transport, xs_create_sock4); } /** @@ -1991,7 +1967,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work) struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); - xs_tcp_setup_socket(transport, xs_create_tcp_sock6); + xs_tcp_setup_socket(transport, xs_create_sock6); } /** From beb59b68280d9779cc16591115547678d1c74a66 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 5 Oct 2010 15:53:08 +0400 Subject: [PATCH 72/99] sunrpc: Merge the xs_bind code There's the only difference betseen the xs_bind4 and the xs_bind6 - the size of sockaddr structure they use. Fortunatelly its size can be indirectly get from the transport. Change since v1: * use sockaddr_storage instead of sockaddr * use rpc_set_port instead of manual port assigning Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever [bfields@redhat.com: fix address family initialization] Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 66 +++++++++++++------------------------------ 1 file changed, 19 insertions(+), 47 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7fdf2bb956a9..fc1e76788120 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1534,23 +1534,18 @@ static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned shor return xprt_max_resvport; return --port; } - -static int xs_bind4(struct sock_xprt *transport, struct socket *sock) +static int xs_bind(struct sock_xprt *transport, struct socket *sock) { - struct sockaddr_in myaddr = { - .sin_family = AF_INET, - }; - struct sockaddr_in *sa; + struct sockaddr_storage myaddr; int err, nloop = 0; unsigned short port = xs_get_srcport(transport); unsigned short last; - sa = (struct sockaddr_in *)&transport->srcaddr; - myaddr.sin_addr = sa->sin_addr; + memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen); do { - myaddr.sin_port = htons(port); - err = kernel_bind(sock, (struct sockaddr *) &myaddr, - sizeof(myaddr)); + rpc_set_port((struct sockaddr *)&myaddr, port); + err = kernel_bind(sock, (struct sockaddr *)&myaddr, + transport->xprt.addrlen); if (port == 0) break; if (err == 0) { @@ -1562,43 +1557,18 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock) if (port > last) nloop++; } while (err == -EADDRINUSE && nloop != 2); - dprintk("RPC: %s %pI4:%u: %s (%d)\n", - __func__, &myaddr.sin_addr, - port, err ? "failed" : "ok", err); + + if (myaddr.ss_family == PF_INET) + dprintk("RPC: %s %pI4:%u: %s (%d)\n", __func__, + &((struct sockaddr_in *)&myaddr)->sin_addr, + port, err ? "failed" : "ok", err); + else + dprintk("RPC: %s %pI6:%u: %s (%d)\n", __func__, + &((struct sockaddr_in6 *)&myaddr)->sin6_addr, + port, err ? "failed" : "ok", err); return err; } -static int xs_bind6(struct sock_xprt *transport, struct socket *sock) -{ - struct sockaddr_in6 myaddr = { - .sin6_family = AF_INET6, - }; - struct sockaddr_in6 *sa; - int err, nloop = 0; - unsigned short port = xs_get_srcport(transport); - unsigned short last; - - sa = (struct sockaddr_in6 *)&transport->srcaddr; - myaddr.sin6_addr = sa->sin6_addr; - do { - myaddr.sin6_port = htons(port); - err = kernel_bind(sock, (struct sockaddr *) &myaddr, - sizeof(myaddr)); - if (port == 0) - break; - if (err == 0) { - transport->srcport = port; - break; - } - last = port; - port = xs_next_srcport(transport, port); - if (port > last) - nloop++; - } while (err == -EADDRINUSE && nloop != 2); - dprintk("RPC: xs_bind6 %pI6:%u: %s (%d)\n", - &myaddr.sin6_addr, port, err ? "failed" : "ok", err); - return err; -} #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key xs_key[2]; @@ -1643,9 +1613,10 @@ static struct socket *xs_create_sock4(struct rpc_xprt *xprt, protocol, -err); goto out; } + transport->srcaddr.ss_family = AF_INET; xs_reclassify_socket4(sock); - if (xs_bind4(transport, sock)) { + if (xs_bind(transport, sock)) { sock_release(sock); goto out; } @@ -1667,9 +1638,10 @@ static struct socket *xs_create_sock6(struct rpc_xprt *xprt, protocol, -err); goto out; } + transport->srcaddr.ss_family = AF_INET6; xs_reclassify_socket6(sock); - if (xs_bind6(transport, sock)) { + if (xs_bind(transport, sock)) { sock_release(sock); goto out; } From 6bc9638ab495516f8a34d2ae48f2f43f145e186f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 4 Oct 2010 16:56:38 +0400 Subject: [PATCH 73/99] sunrpc: Merge xs_create_sock code After xs_bind is merged it's easy to merge its callers. Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever [bfields@redhat.com: fix address family initialization] Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 49 +++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index fc1e76788120..324d97ae71ab 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1591,6 +1591,14 @@ static inline void xs_reclassify_socket6(struct socket *sock) sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); } + +static inline void xs_reclassify_socket(int family, struct socket *sock) +{ + if (family == PF_INET) + xs_reclassify_socket4(sock); + else + xs_reclassify_socket6(sock); +} #else static inline void xs_reclassify_socket4(struct socket *sock) { @@ -1599,22 +1607,26 @@ static inline void xs_reclassify_socket4(struct socket *sock) static inline void xs_reclassify_socket6(struct socket *sock) { } + +static inline void xs_reclassify_socket(int family, struct socket *sock) +{ +} #endif -static struct socket *xs_create_sock4(struct rpc_xprt *xprt, - struct sock_xprt *transport, int type, int protocol) +static struct socket *xs_create_sock(struct rpc_xprt *xprt, + struct sock_xprt *transport, int family, int type, int protocol) { struct socket *sock; int err; - err = __sock_create(xprt->xprt_net, PF_INET, type, protocol, &sock, 1); + err = __sock_create(xprt->xprt_net, family, type, protocol, &sock, 1); if (err < 0) { dprintk("RPC: can't create %d transport socket (%d).\n", protocol, -err); goto out; } - transport->srcaddr.ss_family = AF_INET; - xs_reclassify_socket4(sock); + transport->srcaddr.ss_family = family; + xs_reclassify_socket(family, sock); if (xs_bind(transport, sock)) { sock_release(sock); @@ -1626,29 +1638,16 @@ static struct socket *xs_create_sock4(struct rpc_xprt *xprt, return ERR_PTR(err); } +static struct socket *xs_create_sock4(struct rpc_xprt *xprt, + struct sock_xprt *transport, int type, int protocol) +{ + return xs_create_sock(xprt, transport, PF_INET, type, protocol); +} + static struct socket *xs_create_sock6(struct rpc_xprt *xprt, struct sock_xprt *transport, int type, int protocol) { - struct socket *sock; - int err; - - err = __sock_create(xprt->xprt_net, PF_INET6, type, protocol, &sock, 1); - if (err < 0) { - dprintk("RPC: can't create %d transport socket (%d).\n", - protocol, -err); - goto out; - } - transport->srcaddr.ss_family = AF_INET6; - xs_reclassify_socket6(sock); - - if (xs_bind(transport, sock)) { - sock_release(sock); - goto out; - } - - return sock; -out: - return ERR_PTR(err); + return xs_create_sock(xprt, transport, PF_INET6, type, protocol); } static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) From 7dfe1fc36278c3aa0db29356c491db6353678e98 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 4 Oct 2010 16:57:11 +0400 Subject: [PATCH 74/99] sunrpc: Pass family to setup_socket calls Now we have a single socket creation routine and can call it directly from the setup_socket routines. Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 324d97ae71ab..3dbc5d631d66 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1638,18 +1638,6 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, return ERR_PTR(err); } -static struct socket *xs_create_sock4(struct rpc_xprt *xprt, - struct sock_xprt *transport, int type, int protocol) -{ - return xs_create_sock(xprt, transport, PF_INET, type, protocol); -} - -static struct socket *xs_create_sock6(struct rpc_xprt *xprt, - struct sock_xprt *transport, int type, int protocol) -{ - return xs_create_sock(xprt, transport, PF_INET6, type, protocol); -} - static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1679,9 +1667,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) xs_udp_do_set_buffer_size(xprt); } -static void xs_udp_setup_socket(struct sock_xprt *transport, - struct socket *(*create_sock)(struct rpc_xprt *, - struct sock_xprt *, int type, int protocol)) +static void xs_udp_setup_socket(struct sock_xprt *transport, int family) { struct rpc_xprt *xprt = &transport->xprt; struct socket *sock = transport->sock; @@ -1692,7 +1678,7 @@ static void xs_udp_setup_socket(struct sock_xprt *transport, /* Start by resetting any existing state */ xs_reset_transport(transport); - sock = create_sock(xprt, transport, SOCK_DGRAM, IPPROTO_UDP); + sock = xs_create_sock(xprt, transport, family, SOCK_DGRAM, IPPROTO_UDP); if (IS_ERR(sock)) goto out; @@ -1721,7 +1707,7 @@ static void xs_udp_connect_worker4(struct work_struct *work) struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); - xs_udp_setup_socket(transport, xs_create_sock4); + xs_udp_setup_socket(transport, PF_INET); } /** @@ -1736,7 +1722,7 @@ static void xs_udp_connect_worker6(struct work_struct *work) struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); - xs_udp_setup_socket(transport, xs_create_sock6); + xs_udp_setup_socket(transport, PF_INET6); } /* @@ -1840,9 +1826,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) * * Invoked by a work queue tasklet. */ -static void xs_tcp_setup_socket(struct sock_xprt *transport, - struct socket *(*create_sock)(struct rpc_xprt *, - struct sock_xprt *, int type, int protocol)) +static void xs_tcp_setup_socket(struct sock_xprt *transport, int family) { struct socket *sock = transport->sock; struct rpc_xprt *xprt = &transport->xprt; @@ -1853,7 +1837,7 @@ static void xs_tcp_setup_socket(struct sock_xprt *transport, if (!sock) { clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); - sock = create_sock(xprt, transport, SOCK_STREAM, IPPROTO_TCP); + sock = xs_create_sock(xprt, transport, family, SOCK_STREAM, IPPROTO_TCP); if (IS_ERR(sock)) { status = PTR_ERR(sock); goto out; @@ -1924,7 +1908,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work) struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); - xs_tcp_setup_socket(transport, xs_create_sock4); + xs_tcp_setup_socket(transport, PF_INET); } /** @@ -1938,7 +1922,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work) struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); - xs_tcp_setup_socket(transport, xs_create_sock6); + xs_tcp_setup_socket(transport, PF_INET6); } /** From cdd518d524b49e6e80b109bf985376456a2985ce Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 4 Oct 2010 16:57:40 +0400 Subject: [PATCH 75/99] sunrpc: Remove TCP worker wrappers The v4 and the v6 wrappers only pass the respective family to the xs_tcp_setup_socket. This family can be taken from the xprt's sockaddr. Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 39 +++++++-------------------------------- 1 file changed, 7 insertions(+), 32 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3dbc5d631d66..31ee5748dfaf 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1826,8 +1826,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) * * Invoked by a work queue tasklet. */ -static void xs_tcp_setup_socket(struct sock_xprt *transport, int family) +static void xs_tcp_setup_socket(struct work_struct *work) { + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); struct socket *sock = transport->sock; struct rpc_xprt *xprt = &transport->xprt; int status = -EIO; @@ -1837,7 +1839,8 @@ static void xs_tcp_setup_socket(struct sock_xprt *transport, int family) if (!sock) { clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); - sock = xs_create_sock(xprt, transport, family, SOCK_STREAM, IPPROTO_TCP); + sock = xs_create_sock(xprt, transport, + xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP); if (IS_ERR(sock)) { status = PTR_ERR(sock); goto out; @@ -1897,34 +1900,6 @@ static void xs_tcp_setup_socket(struct sock_xprt *transport, int family) xprt_wake_pending_tasks(xprt, status); } -/** - * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint - * @work: RPC transport to connect - * - * Invoked by a work queue tasklet. - */ -static void xs_tcp_connect_worker4(struct work_struct *work) -{ - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); - - xs_tcp_setup_socket(transport, PF_INET); -} - -/** - * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint - * @work: RPC transport to connect - * - * Invoked by a work queue tasklet. - */ -static void xs_tcp_connect_worker6(struct work_struct *work) -{ - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); - - xs_tcp_setup_socket(transport, PF_INET6); -} - /** * xs_connect - connect a socket to a remote endpoint * @task: address of RPC task that manages state of connect request @@ -2328,7 +2303,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xprt_set_bound(xprt); INIT_DELAYED_WORK(&transport->connect_worker, - xs_tcp_connect_worker4); + xs_tcp_setup_socket); xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); break; case AF_INET6: @@ -2336,7 +2311,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xprt_set_bound(xprt); INIT_DELAYED_WORK(&transport->connect_worker, - xs_tcp_connect_worker6); + xs_tcp_setup_socket); xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); break; default: From 8c14ff2aaf26d58aa2258a59bd419c906d105938 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 4 Oct 2010 16:58:02 +0400 Subject: [PATCH 76/99] sunrpc: Remove UDP worker wrappers Same for UDP sockets creation paths. Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 41 +++++++---------------------------------- 1 file changed, 7 insertions(+), 34 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 31ee5748dfaf..bc5786146eee 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1667,8 +1667,10 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) xs_udp_do_set_buffer_size(xprt); } -static void xs_udp_setup_socket(struct sock_xprt *transport, int family) +static void xs_udp_setup_socket(struct work_struct *work) { + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); struct rpc_xprt *xprt = &transport->xprt; struct socket *sock = transport->sock; int status = -EIO; @@ -1678,7 +1680,8 @@ static void xs_udp_setup_socket(struct sock_xprt *transport, int family) /* Start by resetting any existing state */ xs_reset_transport(transport); - sock = xs_create_sock(xprt, transport, family, SOCK_DGRAM, IPPROTO_UDP); + sock = xs_create_sock(xprt, transport, + xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP); if (IS_ERR(sock)) goto out; @@ -1695,36 +1698,6 @@ static void xs_udp_setup_socket(struct sock_xprt *transport, int family) xprt_wake_pending_tasks(xprt, status); } -/** - * xs_udp_connect_worker4 - set up a UDP socket - * @work: RPC transport to connect - * - * Invoked by a work queue tasklet. - */ - -static void xs_udp_connect_worker4(struct work_struct *work) -{ - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); - - xs_udp_setup_socket(transport, PF_INET); -} - -/** - * xs_udp_connect_worker6 - set up a UDP socket - * @work: RPC transport to connect - * - * Invoked by a work queue tasklet. - */ - -static void xs_udp_connect_worker6(struct work_struct *work) -{ - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); - - xs_udp_setup_socket(transport, PF_INET6); -} - /* * We need to preserve the port number so the reply cache on the server can * find our cached RPC replies when we get around to reconnecting. @@ -2229,7 +2202,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) xprt_set_bound(xprt); INIT_DELAYED_WORK(&transport->connect_worker, - xs_udp_connect_worker4); + xs_udp_setup_socket); xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); break; case AF_INET6: @@ -2237,7 +2210,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) xprt_set_bound(xprt); INIT_DELAYED_WORK(&transport->connect_worker, - xs_udp_connect_worker6); + xs_udp_setup_socket); xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); break; default: From f10fef38d2d1605c977346457d0adb0919d0bbe7 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 5 Oct 2010 20:47:16 +0400 Subject: [PATCH 77/99] sunrpc: Remove useless if (task == NULL) from xprt_reserve_xprt The task in question is dereferenced above (and is actually never NULL). Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 953206d8c6c2..64a4a94fd6dd 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -199,8 +199,6 @@ int xprt_reserve_xprt(struct rpc_task *task) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) return 1; - if (task == NULL) - return 0; goto out_sleep; } xprt->snd_task = task; From c636b572e00e38855dc7a56a0fa438dd835f39de Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 6 Oct 2010 13:45:56 +0400 Subject: [PATCH 78/99] sunrpc: Don't return NULL from rpcb_create > The reason for this is in the future, we may want to support additional > address family types. We should, therefore, ensure that every piece of > code that is sensitive to address families fail in some orderly manner > to let developers know where a change is needed. Makes sense. I was under impression, that AF-s other than INET are not cared about at all :( Here's a fixed version of the patch. Log: Its callers check for ERR_PTR. Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/rpcb_clnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 83af38df3267..1ef2d417af4e 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -249,7 +249,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT); break; default: - return NULL; + return ERR_PTR(-EAFNOSUPPORT); } return rpc_create(&args); From 50fa0d40a9d601bb8e6c9a595e90940bc846f7df Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 5 Oct 2010 20:49:35 +0400 Subject: [PATCH 79/99] sunrpc: Remove dead "else" branch from bc xprt creation Since the xprt in question is forcibly set to be bound the else branch of this check is unneeded. Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bc5786146eee..79155650b7b9 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2366,15 +2366,10 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) goto out_err; } - if (xprt_bound(xprt)) - dprintk("RPC: set up xprt to %s (port %s) via %s\n", - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PORT], - xprt->address_strings[RPC_DISPLAY_PROTO]); - else - dprintk("RPC: set up xprt to %s (autobind) via %s\n", - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PROTO]); + dprintk("RPC: set up xprt to %s (port %s) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + xprt->address_strings[RPC_DISPLAY_PROTO]); /* * Since we don't want connections for the backchannel, we set From 8f3a6de313391b6910aa7db185eb9f3e930a51cf Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 5 Oct 2010 23:30:19 +0400 Subject: [PATCH 80/99] sunrpc: Turn list_for_each-s into the ..._entry-s Saves some lines of code and some branticks when reading one. Signed-off-by: Pavel Emelyanov Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 6 ++---- net/sunrpc/svcsock.c | 12 +++--------- net/sunrpc/xprt.c | 6 ++---- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 12025eedc781..bef1e8844a2c 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -100,16 +100,14 @@ EXPORT_SYMBOL_GPL(svc_unreg_xprt_class); */ int svc_print_xprts(char *buf, int maxlen) { - struct list_head *le; + struct svc_xprt_class *xcl; char tmpstr[80]; int len = 0; buf[0] = '\0'; spin_lock(&svc_xprt_class_lock); - list_for_each(le, &svc_xprt_class_list) { + list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { int slen; - struct svc_xprt_class *xcl = - list_entry(le, struct svc_xprt_class, xcl_list); sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload); slen = strlen(tmpstr); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 88de3d093165..1454739b4233 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1261,19 +1261,13 @@ void svc_sock_update_bufs(struct svc_serv *serv) * The number of server threads has changed. Update * rcvbuf and sndbuf accordingly on all sockets */ - struct list_head *le; + struct svc_sock *svsk; spin_lock_bh(&serv->sv_lock); - list_for_each(le, &serv->sv_permsocks) { - struct svc_sock *svsk = - list_entry(le, struct svc_sock, sk_xprt.xpt_list); + list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); - } - list_for_each(le, &serv->sv_tempsocks) { - struct svc_sock *svsk = - list_entry(le, struct svc_sock, sk_xprt.xpt_list); + list_for_each_entry(svsk, &serv->sv_tempsocks, sk_xprt.xpt_list) set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); - } spin_unlock_bh(&serv->sv_lock); } EXPORT_SYMBOL_GPL(svc_sock_update_bufs); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 64a4a94fd6dd..4c8f18aff7c3 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -755,13 +755,11 @@ static void xprt_connect_status(struct rpc_task *task) */ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) { - struct list_head *pos; + struct rpc_rqst *entry; - list_for_each(pos, &xprt->recv) { - struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list); + list_for_each_entry(entry, &xprt->recv, rq_list) if (entry->rq_xid == xid) return entry; - } dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n", ntohl(xid)); From a25e758c5fa1137e1bbc440194e55f7c59177145 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 15 Oct 2010 17:49:27 +0200 Subject: [PATCH 81/99] sunrpc/xprtrdma: clean up workqueue usage * Create and use svc_rdma_wq instead of using the system workqueue and flush_scheduled_work(). This workqueue is necessary to serve as flushing domain for rdma->sc_work which is used to destroy itself and thus can't be flushed explicitly. * Replace cancel_delayed_work() + flush_scheduled_work() with cancel_delayed_work_sync(). * Implement synchronous connect in xprt_rdma_connect() using flush_delayed_work() on the rdma_connect work instead of using flush_scheduled_work(). This is to prepare for the deprecation and removal of flush_scheduled_work(). Signed-off-by: Tejun Heo Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma.c | 11 ++++++++++- net/sunrpc/xprtrdma/svc_rdma_transport.c | 6 +++++- net/sunrpc/xprtrdma/transport.c | 5 ++--- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index d718b8fa9525..09af4fab1a45 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -74,6 +75,8 @@ atomic_t rdma_stat_sq_prod; struct kmem_cache *svc_rdma_map_cachep; struct kmem_cache *svc_rdma_ctxt_cachep; +struct workqueue_struct *svc_rdma_wq; + /* * This function implements reading and resetting an atomic_t stat * variable through read/write to a proc file. Any write to the file @@ -231,7 +234,7 @@ static ctl_table svcrdma_root_table[] = { void svc_rdma_cleanup(void) { dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); - flush_scheduled_work(); + destroy_workqueue(svc_rdma_wq); if (svcrdma_table_header) { unregister_sysctl_table(svcrdma_table_header); svcrdma_table_header = NULL; @@ -249,6 +252,11 @@ int svc_rdma_init(void) dprintk("\tsq_depth : %d\n", svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); + + svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0); + if (!svc_rdma_wq) + return -ENOMEM; + if (!svcrdma_table_header) svcrdma_table_header = register_sysctl_table(svcrdma_root_table); @@ -283,6 +291,7 @@ int svc_rdma_init(void) kmem_cache_destroy(svc_rdma_map_cachep); err0: unregister_sysctl_table(svcrdma_table_header); + destroy_workqueue(svc_rdma_wq); return -ENOMEM; } MODULE_AUTHOR("Tom Tucker "); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 22f65cc46fe5..9df1eadc912a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -90,6 +91,9 @@ struct svc_xprt_class svc_rdma_class = { /* WR context cache. Created in svc_rdma.c */ extern struct kmem_cache *svc_rdma_ctxt_cachep; +/* Workqueue created in svc_rdma.c */ +extern struct workqueue_struct *svc_rdma_wq; + struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt; @@ -1187,7 +1191,7 @@ static void svc_rdma_free(struct svc_xprt *xprt) struct svcxprt_rdma *rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); INIT_WORK(&rdma->sc_work, __svc_rdma_free); - schedule_work(&rdma->sc_work); + queue_work(svc_rdma_wq, &rdma->sc_work); } static int svc_rdma_has_wspace(struct svc_xprt *xprt) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 2da32b40bfcf..0867070bb5ca 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -237,8 +237,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) dprintk("RPC: %s: called\n", __func__); - cancel_delayed_work(&r_xprt->rdma_connect); - flush_scheduled_work(); + cancel_delayed_work_sync(&r_xprt->rdma_connect); xprt_clear_connected(xprt); @@ -448,7 +447,7 @@ xprt_rdma_connect(struct rpc_task *task) } else { schedule_delayed_work(&r_xprt->rdma_connect, 0); if (!RPC_IS_ASYNC(task)) - flush_scheduled_work(); + flush_delayed_work(&r_xprt->rdma_connect); } } From 4232e8634ad82c5a53389e4016de15a8b15c09c3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Oct 2010 11:52:51 -0400 Subject: [PATCH 82/99] SUNRPC: Use conventional switch statement when reclassifying sockets Clean up. Defensive coding: If "family" is ever something that is neither AF_INET nor AF_INET6, xs_reclassify_socket6() is not the appropriate default action. Choose to do nothing in that case. Introduced by commit 6bc9638a. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 79155650b7b9..b58eef76a518 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1558,7 +1558,7 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) nloop++; } while (err == -EADDRINUSE && nloop != 2); - if (myaddr.ss_family == PF_INET) + if (myaddr.ss_family == AF_INET) dprintk("RPC: %s %pI4:%u: %s (%d)\n", __func__, &((struct sockaddr_in *)&myaddr)->sin_addr, port, err ? "failed" : "ok", err); @@ -1594,10 +1594,14 @@ static inline void xs_reclassify_socket6(struct socket *sock) static inline void xs_reclassify_socket(int family, struct socket *sock) { - if (family == PF_INET) + switch (family) { + case AF_INET: xs_reclassify_socket4(sock); - else + break; + case AF_INET6: xs_reclassify_socket6(sock); + break; + } } #else static inline void xs_reclassify_socket4(struct socket *sock) From 9247685088398cf21bcb513bd2832b4cd42516c4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Oct 2010 11:53:01 -0400 Subject: [PATCH 83/99] SUNRPC: Properly initialize sock_xprt.srcaddr in all cases The source address field in the transport's sock_xprt is initialized ONLY IF the RPC application passed a pointer to a source address during the call to rpc_create(). However, xs_bind() subsequently uses the value of this field without regard to whether the source address was initialized during transport creation or not. So far we've been lucky: the uninitialized value of this field is zeroes. xs_bind(), until recently, used only the sin[6]_addr field in this sockaddr, and all zeroes is a valid value for this: it means ANYADDR. This is a happy coincidence. However, xs_bind() now wants to use the sa_family field as well, and expects it to be initialized to something other than zero. Therefore, the source address sockaddr field should be fully initialized at transport create time in _every_ case, not just when the RPC application wants to use a specific bind address. Bruce added a workaround for this missing initialization by adjusting commit 6bc9638a, but the "right" way to do this is to ensure that the source address sockaddr is always correctly initialized from the get-go. This patch doesn't introduce a behavior change. It's simply a clean-up of Bruce's fix, to prevent future problems of this kind. It may look like overkill, but a) it clearly documents the default initial value of this field, b) it doesn't assume that the sockaddr_storage memory is first initialized to any particular value, and c) it will fail verbosely if some unknown address family is passed in Originally introduced by commit d3bc9a1d. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b58eef76a518..27fc4b4cb82b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1629,7 +1629,6 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, protocol, -err); goto out; } - transport->srcaddr.ss_family = family; xs_reclassify_socket(family, sock); if (xs_bind(transport, sock)) { @@ -2136,6 +2135,31 @@ static struct rpc_xprt_ops bc_tcp_ops = { .print_stats = xs_tcp_print_stats, }; +static int xs_init_anyaddr(const int family, struct sockaddr *sap) +{ + static const struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_ANY), + }; + static const struct sockaddr_in6 sin6 = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_ANY_INIT, + }; + + switch (family) { + case AF_INET: + memcpy(sap, &sin, sizeof(sin)); + break; + case AF_INET6: + memcpy(sap, &sin6, sizeof(sin6)); + break; + default: + dprintk("RPC: %s: Bad address family\n", __func__); + return -EAFNOSUPPORT; + } + return 0; +} + static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, unsigned int slot_table_size) { @@ -2159,6 +2183,13 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, xprt->addrlen = args->addrlen; if (args->srcaddr) memcpy(&new->srcaddr, args->srcaddr, args->addrlen); + else { + int err; + err = xs_init_anyaddr(args->dstaddr->sa_family, + (struct sockaddr *)&new->srcaddr); + if (err != 0) + return ERR_PTR(err); + } return xprt; } From cd5b814458e5554457c6e62f17aed122145b065e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 2 Oct 2010 17:03:35 -0400 Subject: [PATCH 84/99] nfsd4: don't cache seq_misordered replies Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 02c23b7c5cd5..7f1282859cd6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1510,7 +1510,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, if (status) { /* an unconfirmed replay returns misordered */ status = nfserr_seq_misordered; - goto out_cache; + goto out; } cs_slot->sl_seqid++; /* from 0 to 1 */ @@ -1549,7 +1549,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, NFS4_MAX_SESSIONID_LEN); cr_ses->seqid = cs_slot->sl_seqid; -out_cache: /* cache solo and embedded create sessions under the state lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); out: From edd76786633a3145661c7a90c9baccae8e3c9e84 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 14 Jun 2010 22:26:31 -0400 Subject: [PATCH 85/99] nfsd4: move callback setup into session init code The backchannel should be associated with a session, it isn't really global to the client. We do, however, want a pointer global to the client which tracks which session we're currently using for client-based callbacks. This is a first step in that direction; for now, just reshuffling of code with no significant change in behavior. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 29 ++++++++++++++--------------- fs/nfsd/state.h | 1 + 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7f1282859cd6..db5d8c8537ed 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -771,6 +771,19 @@ static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp free_session(&new->se_ref); return nfserr_jukebox; } + if (!clp->cl_cb_session && (cses->flags & SESSION4_BACK_CHAN)) { + struct sockaddr *sa = svc_addr(rqstp); + + clp->cl_cb_session = new; + clp->cl_cb_conn.cb_xprt = rqstp->rq_xprt; + svc_xprt_get(rqstp->rq_xprt); + rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); + clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); + clp->cl_cb_conn.cb_minorversion = 1; + clp->cl_cb_conn.cb_prog = cses->callback_prog; + clp->cl_cb_seq_nr = 1; + nfsd4_probe_callback(clp, &clp->cl_cb_conn); + } return nfs_ok; } @@ -1045,7 +1058,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, clp->cl_flavor = rqstp->rq_flavor; copy_cred(&clp->cl_cred, &rqstp->rq_cred); gen_confirm(clp); - + clp->cl_cb_session = NULL; return clp; } @@ -1515,20 +1528,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, cs_slot->sl_seqid++; /* from 0 to 1 */ move_to_confirmed(unconf); - - if (cr_ses->flags & SESSION4_BACK_CHAN) { - unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt; - svc_xprt_get(rqstp->rq_xprt); - rpc_copy_addr( - (struct sockaddr *)&unconf->cl_cb_conn.cb_addr, - sa); - unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa); - unconf->cl_cb_conn.cb_minorversion = - cstate->minorversion; - unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog; - unconf->cl_cb_seq_nr = 1; - nfsd4_probe_callback(unconf, &unconf->cl_cb_conn); - } conf = unconf; } else { status = nfserr_stale_clientid; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 8d5e2370cce0..6e63c1d272bf 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -234,6 +234,7 @@ struct nfs4_client { u32 cl_cb_ident; atomic_t cl_cb_set; struct nfsd4_callback cl_cb_null; + struct nfsd4_session *cl_cb_session; /* for all client information that callback code might need: */ spinlock_t cl_lock; From 90c8145bb6fe1d9e0a808de6a701748967588bbd Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 14 Jun 2010 17:49:37 -0400 Subject: [PATCH 86/99] nfsd4: use client pointer to backchannel session Instead of copying the sessionid, use the new cl_cb_session pointer, which indicates which session we're using for the backchannel. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 9 +++++---- fs/nfsd/nfs4state.c | 4 +--- fs/nfsd/state.h | 1 - 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index a269dbeff150..78ac779c09ff 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -251,6 +251,7 @@ encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, struct nfs4_cb_compound_hdr *hdr) { __be32 *p; + struct nfsd4_session *ses = cb->cb_clp->cl_cb_session; if (hdr->minorversion == 0) return; @@ -258,7 +259,7 @@ encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20); WRITE32(OP_CB_SEQUENCE); - WRITEMEM(cb->cb_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); + WRITEMEM(ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); WRITE32(cb->cb_clp->cl_cb_seq_nr); WRITE32(0); /* slotid, always 0 */ WRITE32(0); /* highest slotid always 0 */ @@ -341,6 +342,7 @@ static int decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, struct rpc_rqst *rqstp) { + struct nfsd4_session *ses = cb->cb_clp->cl_cb_session; struct nfs4_sessionid id; int status; u32 dummy; @@ -362,8 +364,7 @@ decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); - if (memcmp(id.data, cb->cb_clp->cl_sessionid.data, - NFS4_MAX_SESSIONID_LEN)) { + if (memcmp(id.data, ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) { dprintk("%s Invalid session id\n", __func__); goto out; } @@ -587,7 +588,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, struct rpc_task *task) { - u32 *ptr = (u32 *)clp->cl_sessionid.data; + u32 *ptr = (u32 *)clp->cl_cb_session->se_sessionid.data; int status = 0; dprintk("%s: %u:%u:%u:%u\n", __func__, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index db5d8c8537ed..c942511f73e6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -753,8 +753,6 @@ static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp new->se_client = clp; gen_sessionid(new); - memcpy(clp->cl_sessionid.data, new->se_sessionid.data, - NFS4_MAX_SESSIONID_LEN); INIT_LIST_HEAD(&new->se_conns); @@ -1544,7 +1542,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, if (status) goto out; - memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, + memcpy(cr_ses->sessionid.data, conf->cl_cb_session->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); cr_ses->seqid = cs_slot->sl_seqid; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 6e63c1d272bf..cdce26ad50b5 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -243,7 +243,6 @@ struct nfs4_client { struct list_head cl_sessions; struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ u32 cl_exchange_flags; - struct nfs4_sessionid cl_sessionid; /* number of rpc's in progress over an associated session: */ atomic_t cl_refcount; From ac7c46f29a44f6d7f6d2e36dc874c0b7056acad2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 14 Jun 2010 19:01:57 -0400 Subject: [PATCH 87/99] nfsd4: make backchannel sequence number per-session Currently we don't deal well with a client that has multiple sessions associated with it (even simultaneously, or serially over the lifetime of the client). In particular, we don't attempt to keep the backchannel running after the original session diseappears. We will fix that soon. Once we do that, we need the slot sequence number to be per-session; otherwise, for example, we cannot correctly handle a case like this: - All session 1 connections are lost. - The client creates session 2. We use it for the backchannel (since it's the only working choice). - The client gives us a new connection to use with session 1. - The client destroys session 2. At this point our only choice is to go back to using session 1. When we do so we must use the sequence number that is next for session 1. We therefore need to maintain multiple sequence number streams. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 8 ++++---- fs/nfsd/nfs4state.c | 22 ++++++++++++---------- fs/nfsd/state.h | 2 +- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 78ac779c09ff..5df9dda47bf4 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -260,7 +260,7 @@ encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, WRITE32(OP_CB_SEQUENCE); WRITEMEM(ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(cb->cb_clp->cl_cb_seq_nr); + WRITE32(ses->se_cb_seq_nr); WRITE32(0); /* slotid, always 0 */ WRITE32(0); /* highest slotid always 0 */ WRITE32(0); /* cachethis always 0 */ @@ -369,7 +369,7 @@ decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, goto out; } READ32(dummy); - if (dummy != cb->cb_clp->cl_cb_seq_nr) { + if (dummy != ses->se_cb_seq_nr) { dprintk("%s Invalid sequence number\n", __func__); goto out; } @@ -643,11 +643,11 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) if (clp->cl_cb_conn.cb_minorversion) { /* No need for lock, access serialized in nfsd4_cb_prepare */ - ++clp->cl_cb_seq_nr; + ++clp->cl_cb_session->se_cb_seq_nr; clear_bit(0, &clp->cl_cb_slot_busy); rpc_wake_up_next(&clp->cl_cb_waitq); dprintk("%s: freed slot, new seqid=%d\n", __func__, - clp->cl_cb_seq_nr); + clp->cl_cb_session->se_cb_seq_nr); /* We're done looking into the sequence information */ task->tk_msg.rpc_resp = NULL; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c942511f73e6..6367c445d015 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -725,8 +725,7 @@ void free_session(struct kref *kref) kfree(ses); } - -static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses) +static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses) { struct nfsd4_session *new; struct nfsd4_channel_attrs *fchan = &cses->fore_channel; @@ -747,7 +746,7 @@ static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp new = alloc_session(slotsize, numslots); if (!new) { nfsd4_put_drc_mem(slotsize, fchan->maxreqs); - return nfserr_jukebox; + return NULL; } init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize); @@ -756,6 +755,7 @@ static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp INIT_LIST_HEAD(&new->se_conns); + new->se_cb_seq_nr = 1; new->se_flags = cses->flags; kref_init(&new->se_ref); idx = hash_sessionid(&new->se_sessionid); @@ -765,9 +765,10 @@ static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp spin_unlock(&client_lock); status = nfsd4_new_conn(rqstp, new); + /* whoops: benny points out, status is ignored! (err, or bogus) */ if (status) { free_session(&new->se_ref); - return nfserr_jukebox; + return NULL; } if (!clp->cl_cb_session && (cses->flags & SESSION4_BACK_CHAN)) { struct sockaddr *sa = svc_addr(rqstp); @@ -779,10 +780,9 @@ static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); clp->cl_cb_conn.cb_minorversion = 1; clp->cl_cb_conn.cb_prog = cses->callback_prog; - clp->cl_cb_seq_nr = 1; nfsd4_probe_callback(clp, &clp->cl_cb_conn); } - return nfs_ok; + return new; } /* caller must hold client_lock */ @@ -1485,6 +1485,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, { struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; + struct nfsd4_session *new; struct nfsd4_clid_slot *cs_slot = NULL; int status = 0; @@ -1538,11 +1539,12 @@ nfsd4_create_session(struct svc_rqst *rqstp, cr_ses->flags &= ~SESSION4_PERSIST; cr_ses->flags &= ~SESSION4_RDMA; - status = alloc_init_session(rqstp, conf, cr_ses); - if (status) + status = nfserr_jukebox; + new = alloc_init_session(rqstp, conf, cr_ses); + if (!new) goto out; - - memcpy(cr_ses->sessionid.data, conf->cl_cb_session->se_sessionid.data, + status = nfs_ok; + memcpy(cr_ses->sessionid.data, new->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); cr_ses->seqid = cs_slot->sl_seqid; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index cdce26ad50b5..7f5b2671ef18 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -172,6 +172,7 @@ struct nfsd4_session { struct nfsd4_channel_attrs se_fchannel; struct nfsd4_channel_attrs se_bchannel; struct list_head se_conns; + u32 se_cb_seq_nr; struct nfsd4_slot *se_slots[]; /* forward channel slots */ }; @@ -249,7 +250,6 @@ struct nfs4_client { /* for nfs41 callbacks */ /* We currently support a single back channel with a single slot */ unsigned long cl_cb_slot_busy; - u32 cl_cb_seq_nr; struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ /* wait here for slots */ }; From 86c3e16cc7aace4d1143952813b6cc2a80c51295 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 2 Oct 2010 17:04:00 -0400 Subject: [PATCH 88/99] nfsd4: confirm only on succesful create_session Following rfc 5661, section 18.36.4: "If the session is not successfully created, then no changes are made to any client records on the server." We shouldn't be confirming or incrementing the sequence id in this case. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6367c445d015..7e817d13cd82 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1487,6 +1487,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, struct nfs4_client *conf, *unconf; struct nfsd4_session *new; struct nfsd4_clid_slot *cs_slot = NULL; + bool confirm_me = false; int status = 0; nfs4_lock_state(); @@ -1509,7 +1510,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, cs_slot->sl_seqid, cr_ses->seqid); goto out; } - cs_slot->sl_seqid++; } else if (unconf) { if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { @@ -1525,8 +1525,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out; } - cs_slot->sl_seqid++; /* from 0 to 1 */ - move_to_confirmed(unconf); + confirm_me = true; conf = unconf; } else { status = nfserr_stale_clientid; @@ -1546,10 +1545,13 @@ nfsd4_create_session(struct svc_rqst *rqstp, status = nfs_ok; memcpy(cr_ses->sessionid.data, new->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); + cs_slot->sl_seqid++; cr_ses->seqid = cs_slot->sl_seqid; /* cache solo and embedded create sessions under the state lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); + if (confirm_me) + move_to_confirmed(conf); out: nfs4_unlock_state(); dprintk("%s returns %d\n", __func__, ntohl(status)); From d29c374cd20de620898d2936396048518809ae24 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 15 Jun 2010 17:34:11 -0400 Subject: [PATCH 89/99] nfsd4: track backchannel connections We need to keep track of which connections are available for use with the backchannel, which for the forechannel, and which for both. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7e817d13cd82..c470cb78c6c1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -644,7 +644,7 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u) spin_unlock(&clp->cl_lock); } -static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp) +static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags) { struct nfsd4_conn *conn; @@ -653,7 +653,7 @@ static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp) return NULL; svc_xprt_get(rqstp->rq_xprt); conn->cn_xprt = rqstp->rq_xprt; - conn->cn_flags = NFS4_CDFC4_FORE; + conn->cn_flags = flags; INIT_LIST_HEAD(&conn->cn_xpt_user.list); return conn; } @@ -682,8 +682,11 @@ static void nfsd4_register_conn(struct nfsd4_conn *conn) static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) { struct nfsd4_conn *conn; + u32 flags = NFS4_CDFC4_FORE; - conn = alloc_conn(rqstp); + if (ses->se_flags & SESSION4_BACK_CHAN) + flags |= NFS4_CDFC4_BACK; + conn = alloc_conn(rqstp, flags); if (!conn) return nfserr_jukebox; nfsd4_hash_conn(conn, ses); @@ -1640,7 +1643,7 @@ static void nfsd4_sequence_check_conn(struct svc_rqst *rqstp, struct nfsd4_sessi if (c) return; - new = alloc_conn(rqstp); + new = alloc_conn(rqstp, NFS4_CDFC4_FORE); spin_lock(&clp->cl_lock); c = __nfsd4_find_conn(rqstp, ses); From 8b5ce5cd44743af84507721fa2cb4125ae67955c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 19 Oct 2010 17:31:50 -0400 Subject: [PATCH 90/99] nfsd4: callback program number is per-session The callback program is allowed to depend on the session which the callback is going over. No change in behavior yet, while we still only do callbacks over a single session for the lifetime of the client. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 1 + fs/nfsd/nfs4state.c | 2 +- fs/nfsd/state.h | 4 +++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 5df9dda47bf4..140bb3656a24 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -498,6 +498,7 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn) return -EINVAL; if (conn->cb_minorversion) { args.bc_xprt = conn->cb_xprt; + args.prognumber = clp->cl_cb_session->se_cb_prog; args.protocol = XPRT_TRANSPORT_BC_TCP; } /* Create RPC client */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c470cb78c6c1..59bc0011516b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -760,6 +760,7 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n new->se_cb_seq_nr = 1; new->se_flags = cses->flags; + new->se_cb_prog = cses->callback_prog; kref_init(&new->se_ref); idx = hash_sessionid(&new->se_sessionid); spin_lock(&client_lock); @@ -782,7 +783,6 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); clp->cl_cb_conn.cb_minorversion = 1; - clp->cl_cb_conn.cb_prog = cses->callback_prog; nfsd4_probe_callback(clp, &clp->cl_cb_conn); } return new; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 7f5b2671ef18..b3bed366aba4 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -96,7 +96,8 @@ struct nfs4_cb_conn { /* SETCLIENTID info */ struct sockaddr_storage cb_addr; size_t cb_addrlen; - u32 cb_prog; + u32 cb_prog; /* used only in 4.0 case; + per-session otherwise */ u32 cb_minorversion; u32 cb_ident; /* minorversion 0 only */ struct svc_xprt *cb_xprt; /* minorversion 1 only */ @@ -172,6 +173,7 @@ struct nfsd4_session { struct nfsd4_channel_attrs se_fchannel; struct nfsd4_channel_attrs se_bchannel; struct list_head se_conns; + u32 se_cb_prog; u32 se_cb_seq_nr; struct nfsd4_slot *se_slots[]; /* forward channel slots */ }; From 5a3c9d71343cf27b7afef24ed312368d48dada09 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 19 Oct 2010 17:56:52 -0400 Subject: [PATCH 91/99] nfsd4: separate callback change and callback probe Only one of the nfsd4_callback_probe callers actually cares about changing the callback information. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 15 ++++++++++----- fs/nfsd/nfs4state.c | 7 ++++--- fs/nfsd/state.h | 3 ++- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 140bb3656a24..d38ee3c55a08 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -550,7 +550,7 @@ int set_callback_cred(void) static struct workqueue_struct *callback_wq; -void do_probe_callback(struct nfs4_client *clp) +static void do_probe_callback(struct nfs4_client *clp) { struct nfsd4_callback *cb = &clp->cl_cb_null; @@ -568,17 +568,22 @@ void do_probe_callback(struct nfs4_client *clp) } /* - * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... + * Poke the callback thread to process any updates to the callback + * parameters, and send a null probe. */ -void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) +void nfsd4_probe_callback(struct nfs4_client *clp) +{ + set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); + do_probe_callback(clp); +} + +void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) { BUG_ON(atomic_read(&clp->cl_cb_set)); spin_lock(&clp->cl_lock); memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn)); - set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); spin_unlock(&clp->cl_lock); - do_probe_callback(clp); } /* diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 59bc0011516b..2327a8c00862 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -783,7 +783,7 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); clp->cl_cb_conn.cb_minorversion = 1; - nfsd4_probe_callback(clp, &clp->cl_cb_conn); + nfsd4_probe_callback(clp); } return new; } @@ -1912,7 +1912,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, status = nfserr_clid_inuse; else { atomic_set(&conf->cl_cb_set, 0); - nfsd4_probe_callback(conf, &unconf->cl_cb_conn); + nfsd4_change_callback(conf, &unconf->cl_cb_conn); + nfsd4_probe_callback(conf); expire_client(unconf); status = nfs_ok; @@ -1946,7 +1947,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, } move_to_confirmed(unconf); conf = unconf; - nfsd4_probe_callback(conf, &conf->cl_cb_conn); + nfsd4_probe_callback(conf); status = nfs_ok; } } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index b3bed366aba4..bbc4d587b341 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -453,7 +453,8 @@ extern int nfs4_in_grace(void); extern __be32 nfs4_check_open_reclaim(clientid_t *clid); extern void nfs4_free_stateowner(struct kref *kref); extern int set_callback_cred(void); -extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); +extern void nfsd4_probe_callback(struct nfs4_client *clp); +extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); extern void nfsd4_do_callback_rpc(struct work_struct *); extern void nfsd4_cb_recall(struct nfs4_delegation *dp); extern int nfsd4_create_callback_queue(void); From 792c95dd519c54d6b0fd6401b3da7ea67b0d6b72 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 12 Oct 2010 19:55:25 -0400 Subject: [PATCH 92/99] nfsd4: delay session removal till free_client Have unhash_client_locked() remove client and associated sessions from global hashes, but delay further dismantling till free_client(). (After unhash_client_locked(), the only remaining references outside the destroying thread are from any connections which have xpt_user callbacks registered.) This will simplify locking on session destruction. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2327a8c00862..0f2643dac22a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -883,6 +883,13 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) static inline void free_client(struct nfs4_client *clp) { + while (!list_empty(&clp->cl_sessions)) { + struct nfsd4_session *ses; + ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, + se_perclnt); + list_del(&ses->se_perclnt); + nfsd4_put_session(ses); + } if (clp->cl_cred.cr_group_info) put_group_info(clp->cl_cred.cr_group_info); kfree(clp->cl_principal); @@ -909,15 +916,12 @@ release_session_client(struct nfsd4_session *session) static inline void unhash_client_locked(struct nfs4_client *clp) { + struct nfsd4_session *ses; + mark_client_expired(clp); list_del(&clp->cl_lru); - while (!list_empty(&clp->cl_sessions)) { - struct nfsd4_session *ses; - ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, - se_perclnt); - unhash_session(ses); - nfsd4_put_session(ses); - } + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) + list_del_init(&ses->se_hash); } static void @@ -1031,6 +1035,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, if (clp == NULL) return NULL; + INIT_LIST_HEAD(&clp->cl_sessions); + princ = svc_gss_principal(rqstp); if (princ) { clp->cl_principal = kstrdup(princ, GFP_KERNEL); @@ -1047,7 +1053,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, INIT_LIST_HEAD(&clp->cl_strhash); INIT_LIST_HEAD(&clp->cl_openowners); INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_sessions); INIT_LIST_HEAD(&clp->cl_lru); spin_lock_init(&clp->cl_lock); INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc); From 8323c3b2a6b6543919d5ebdddc7d52f192126161 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 19 Oct 2010 19:36:51 -0400 Subject: [PATCH 93/99] nfsd4: move minorversion to client The minorversion seems more a property of the client than the callback channel. Some time we should probably also enforce consistent minorversion usage from the client; for now, this is just a cosmetic change. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 8 ++++---- fs/nfsd/nfs4state.c | 12 ++++++++++-- fs/nfsd/state.h | 2 +- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index d38ee3c55a08..67bcd2c72623 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -496,7 +496,7 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn) if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; - if (conn->cb_minorversion) { + if (clp->cl_minorversion) { args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; args.protocol = XPRT_TRANSPORT_BC_TCP; @@ -620,7 +620,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) struct nfsd4_callback *cb = calldata; struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); struct nfs4_client *clp = dp->dl_client; - u32 minorversion = clp->cl_cb_conn.cb_minorversion; + u32 minorversion = clp->cl_minorversion; int status = 0; cb->cb_minorversion = minorversion; @@ -645,9 +645,9 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) struct nfs4_client *clp = dp->dl_client; dprintk("%s: minorversion=%d\n", __func__, - clp->cl_cb_conn.cb_minorversion); + clp->cl_minorversion); - if (clp->cl_cb_conn.cb_minorversion) { + if (clp->cl_minorversion) { /* No need for lock, access serialized in nfsd4_cb_prepare */ ++clp->cl_cb_session->se_cb_seq_nr; clear_bit(0, &clp->cl_cb_slot_busy); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0f2643dac22a..ce0412fd23eb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -782,7 +782,6 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n svc_xprt_get(rqstp->rq_xprt); rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); - clp->cl_cb_conn.cb_minorversion = 1; nfsd4_probe_callback(clp); } return new; @@ -1200,7 +1199,6 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) if (conn->cb_addr.ss_family == AF_INET6) ((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid; - conn->cb_minorversion = 0; conn->cb_prog = se->se_callback_prog; conn->cb_ident = se->se_callback_ident; return; @@ -1540,6 +1538,11 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out; } + /* + * XXX: we should probably set this at creation time, and check + * for consistent minorversion use throughout: + */ + conf->cl_minorversion = 1; /* * We do not support RDMA or persistent sessions */ @@ -1857,6 +1860,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; gen_clid(new); } + /* + * XXX: we should probably set this at creation time, and check + * for consistent minorversion use throughout: + */ + new->cl_minorversion = 0; gen_callback(new, setclid, rpc_get_scope_id(sa)); add_to_unconfirmed(new, strhashval); setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index bbc4d587b341..39adc27b0685 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -98,7 +98,6 @@ struct nfs4_cb_conn { size_t cb_addrlen; u32 cb_prog; /* used only in 4.0 case; per-session otherwise */ - u32 cb_minorversion; u32 cb_ident; /* minorversion 0 only */ struct svc_xprt *cb_xprt; /* minorversion 1 only */ }; @@ -227,6 +226,7 @@ struct nfs4_client { clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ u32 cl_firststate; /* recovery dir creation */ + u32 cl_minorversion; /* for v4.0 and v4.1 callbacks: */ struct nfs4_cb_conn cl_cb_conn; From 5d18c1c2a9a74e0f966c257520b8b7f5136c87b3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 19 Oct 2010 23:00:12 -0400 Subject: [PATCH 94/99] nfsd4: only require krb5 principal for NFSv4.0 callbacks In the sessions backchannel case, we don't need a krb5 principal name for the client; we use the already-created forechannel credentials instead. Some cleanup, while we're there: make it clearer which code here is 4.0- or sessions- specific. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 67bcd2c72623..143da2eecd7b 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -481,22 +481,24 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn) }; struct rpc_create_args args = { .net = &init_net, - .protocol = XPRT_TRANSPORT_TCP, .address = (struct sockaddr *) &conn->cb_addr, .addrsize = conn->cb_addrlen, .timeout = &timeparms, .program = &cb_program, - .prognumber = conn->cb_prog, .version = 0, .authflavor = clp->cl_flavor, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), - .client_name = clp->cl_principal, }; struct rpc_clnt *client; - if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) - return -EINVAL; - if (clp->cl_minorversion) { + if (clp->cl_minorversion == 0) { + if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) + return -EINVAL; + args.client_name = clp->cl_principal; + args.prognumber = conn->cb_prog, + args.protocol = XPRT_TRANSPORT_TCP; + clp->cl_cb_ident = conn->cb_ident; + } else { args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; args.protocol = XPRT_TRANSPORT_BC_TCP; @@ -508,7 +510,6 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn) PTR_ERR(client)); return PTR_ERR(client); } - clp->cl_cb_ident = conn->cb_ident; clp->cl_cb_client = client; return 0; From a663bdd8c5d18d287f7468470816c9e0e66343c1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 21 Oct 2010 17:17:31 -0400 Subject: [PATCH 95/99] nfsd4: fix connection allocation in sequence() We're doing an allocation under a spinlock, and ignoring the possibility of allocation failure. A better fix wouldn't require an unnecessary allocation in the common case, but we'll leave that for later. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ce0412fd23eb..d4aa1b59d84b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1628,33 +1628,25 @@ nfsd4_destroy_session(struct svc_rqst *r, return status; } -static struct nfsd4_conn *__nfsd4_find_conn(struct svc_rqst *r, struct nfsd4_session *s) +static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s) { struct nfsd4_conn *c; list_for_each_entry(c, &s->se_conns, cn_persession) { - if (c->cn_xprt == r->rq_xprt) { + if (c->cn_xprt == xpt) { return c; } } return NULL; } -static void nfsd4_sequence_check_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) +static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; - struct nfsd4_conn *c, *new = NULL; + struct nfsd4_conn *c; spin_lock(&clp->cl_lock); - c = __nfsd4_find_conn(rqstp, ses); - spin_unlock(&clp->cl_lock); - if (c) - return; - - new = alloc_conn(rqstp, NFS4_CDFC4_FORE); - - spin_lock(&clp->cl_lock); - c = __nfsd4_find_conn(rqstp, ses); + c = __nfsd4_find_conn(new->cn_xprt, ses); if (c) { spin_unlock(&clp->cl_lock); free_conn(new); @@ -1674,11 +1666,20 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfsd4_session *session; struct nfsd4_slot *slot; + struct nfsd4_conn *conn; int status; if (resp->opcnt != 1) return nfserr_sequence_pos; + /* + * Will be either used or freed by nfsd4_sequence_check_conn + * below. + */ + conn = alloc_conn(rqstp, NFS4_CDFC4_FORE); + if (!conn) + return nfserr_jukebox; + spin_lock(&client_lock); status = nfserr_badsession; session = find_in_sessionid_hashtbl(&seq->sessionid); @@ -1710,7 +1711,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (status) goto out; - nfsd4_sequence_check_conn(rqstp, session); + nfsd4_sequence_check_conn(conn, session); + conn = NULL; /* Success! bump slot seqid */ slot->sl_inuse = true; @@ -1726,6 +1728,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, nfsd4_get_session(cstate->session); atomic_inc(&session->se_client->cl_refcount); } + kfree(conn); spin_unlock(&client_lock); dprintk("%s: return %d\n", __func__, ntohl(status)); return status; From 7e4fdd0744fcb9f08854c37643bf529c5945cc36 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 23 Oct 2010 11:04:12 -0400 Subject: [PATCH 96/99] svcrpc: never clear XPT_BUSY on dead xprt Once an xprt has been deleted, there's no reason to allow it to be enqueued--at worst, that might cause the xprt to be re-added to some global list, resulting in later corruption. Also, note this leaves us with no need for the reference-count manipulation here. Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index bef1e8844a2c..1b0673c6e986 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -936,10 +936,7 @@ void svc_close_xprt(struct svc_xprt *xprt) /* someone else will have to effect the close */ return; - svc_xprt_get(xprt); svc_delete_xprt(xprt); - clear_bit(XPT_BUSY, &xprt->xpt_flags); - svc_xprt_put(xprt); } EXPORT_SYMBOL_GPL(svc_close_xprt); From ac9303eb74471bc2567960b47497a8bfbe1e5a03 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 23 Oct 2010 11:16:10 -0400 Subject: [PATCH 97/99] svcrpc: assume svc_delete_xprt() called only once As long as DEAD exports are left BUSY, and svc_delete_xprt is called only with BUSY held, then svc_delete_xprt() will never be called on an xprt that is already DEAD. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 1b0673c6e986..c65a47218290 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -903,7 +903,7 @@ void svc_delete_xprt(struct svc_xprt *xprt) /* Only do this once */ if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) - return; + BUG(); dprintk("svc: svc_delete_xprt(%p)\n", xprt); xprt->xpt_ops->xpo_detach(xprt); From 01dba075d571f5a8b7dcb153fdfd14e981c4cee3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 23 Oct 2010 11:23:46 -0400 Subject: [PATCH 98/99] svcrpc: no need for XPT_DEAD check in svc_xprt_enqueue If any xprt marked DEAD is also left BUSY for the rest of its life, then the XPT_DEAD check here is superfluous--we'll get the same result from the XPT_BUSY check just after. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c65a47218290..c82fe739fbdc 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -330,12 +330,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) "svc_xprt_enqueue: " "threads and transports both waiting??\n"); - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) { - /* Don't enqueue dead transports */ - dprintk("svc: transport %p is dead, not enqueued\n", xprt); - goto out_unlock; - } - pool->sp_stats.packets++; /* Mark transport as busy. It will remain in this state until From 42d7ba3d6d56a6cbc773284896108b1e2ebcee81 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 23 Oct 2010 11:55:53 -0400 Subject: [PATCH 99/99] svcrpc: svc_tcp_sendto XPT_DEAD check is redundant The only caller (svc_send) has already checked XPT_DEAD. Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 1454739b4233..07919e16be3e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1135,9 +1135,6 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) reclen = htonl(0x80000000|((xbufp->len ) - 4)); memcpy(xbufp->head[0].iov_base, &reclen, 4); - if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags)) - return -ENOTCONN; - sent = svc_sendto(rqstp, &rqstp->rq_res); if (sent != xbufp->len) { printk(KERN_NOTICE