Merge branch 'nfs-for-3.1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

* 'nfs-for-3.1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (44 commits)
  NFSv4: Don't use the delegation->inode in nfs_mark_return_delegation()
  nfs: don't use d_move in nfs_async_rename_done
  RDMA: Increasing RPCRDMA_MAX_DATA_SEGS
  SUNRPC: Replace xprt->resend and xprt->sending with a priority queue
  SUNRPC: Allow caller of rpc_sleep_on() to select priority levels
  SUNRPC: Support dynamic slot allocation for TCP connections
  SUNRPC: Clean up the slot table allocation
  SUNRPC: Initalise the struct xprt upon allocation
  SUNRPC: Ensure that we grab the XPRT_LOCK before calling xprt_alloc_slot
  pnfs: simplify pnfs files module autoloading
  nfs: document nfsv4 sillyrename issues
  NFS: Convert nfs4_set_ds_client to EXPORT_SYMBOL_GPL
  SUNRPC: Convert the backchannel exports to EXPORT_SYMBOL_GPL
  SUNRPC: sunrpc should not explicitly depend on NFS config options
  NFS: Clean up - simplify the switch to read/write-through-MDS
  NFS: Move the pnfs write code into pnfs.c
  NFS: Move the pnfs read code into pnfs.c
  NFS: Allow the nfs_pageio_descriptor to signal that a re-coalesce is needed
  NFS: Use the nfs_pageio_descriptor->pg_bsize in the read/write request
  NFS: Cache rpc_ops in struct nfs_pageio_descriptor
  ...
This commit is contained in:
Linus Torvalds 2011-07-27 13:23:02 -07:00
commit 28890d3598
44 changed files with 1856 additions and 632 deletions

View file

@ -302,7 +302,8 @@ nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc)
/* We appear to be out of the grace period */
wake_up_all(&host->h_gracewait);
}
dprintk("lockd: server returns status %d\n", resp->status);
dprintk("lockd: server returns status %d\n",
ntohl(resp->status));
return 0; /* Okay, call complete */
}
@ -690,7 +691,8 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
goto out;
if (resp->status != nlm_lck_denied_nolocks)
printk("lockd: unexpected unlock status: %d\n", resp->status);
printk("lockd: unexpected unlock status: %d\n",
ntohl(resp->status));
/* What to do now? I'm out of my depth... */
status = -ENOLCK;
out:
@ -843,6 +845,7 @@ nlm_stat_to_errno(__be32 status)
return -ENOLCK;
#endif
}
printk(KERN_NOTICE "lockd: unexpected server status %d\n", status);
printk(KERN_NOTICE "lockd: unexpected server status %d\n",
ntohl(status));
return -ENOLCK;
}

View file

@ -77,6 +77,7 @@ config NFS_V4
config NFS_V4_1
bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
depends on NFS_FS && NFS_V4 && EXPERIMENTAL
select SUNRPC_BACKCHANNEL
select PNFS_FILE_LAYOUT
help
This option enables support for minor version 1 of the NFSv4 protocol

View file

@ -111,6 +111,7 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
static u32 initiate_file_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
struct nfs_server *server;
struct pnfs_layout_hdr *lo;
struct inode *ino;
bool found = false;
@ -118,21 +119,28 @@ static u32 initiate_file_draining(struct nfs_client *clp,
LIST_HEAD(free_me_list);
spin_lock(&clp->cl_lock);
list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
if (nfs_compare_fh(&args->cbl_fh,
&NFS_I(lo->plh_inode)->fh))
continue;
ino = igrab(lo->plh_inode);
if (!ino)
continue;
found = true;
/* Without this, layout can be freed as soon
* as we release cl_lock.
*/
get_layout_hdr(lo);
break;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry(lo, &server->layouts, plh_layouts) {
if (nfs_compare_fh(&args->cbl_fh,
&NFS_I(lo->plh_inode)->fh))
continue;
ino = igrab(lo->plh_inode);
if (!ino)
continue;
found = true;
/* Without this, layout can be freed as soon
* as we release cl_lock.
*/
get_layout_hdr(lo);
break;
}
if (found)
break;
}
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
if (!found)
return NFS4ERR_NOMATCHING_LAYOUT;
@ -154,6 +162,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
static u32 initiate_bulk_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
struct nfs_server *server;
struct pnfs_layout_hdr *lo;
struct inode *ino;
u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
@ -167,18 +176,24 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
};
spin_lock(&clp->cl_lock);
list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
if ((args->cbl_recall_type == RETURN_FSID) &&
memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
&args->cbl_fsid, sizeof(struct nfs_fsid)))
memcmp(&server->fsid, &args->cbl_fsid,
sizeof(struct nfs_fsid)))
continue;
if (!igrab(lo->plh_inode))
continue;
get_layout_hdr(lo);
BUG_ON(!list_empty(&lo->plh_bulk_recall));
list_add(&lo->plh_bulk_recall, &recall_list);
list_for_each_entry(lo, &server->layouts, plh_layouts) {
if (!igrab(lo->plh_inode))
continue;
get_layout_hdr(lo);
BUG_ON(!list_empty(&lo->plh_bulk_recall));
list_add(&lo->plh_bulk_recall, &recall_list);
}
}
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
list_for_each_entry_safe(lo, tmp,
&recall_list, plh_bulk_recall) {
ino = lo->plh_inode;

View file

@ -188,9 +188,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
cred = rpc_lookup_machine_cred();
if (!IS_ERR(cred))
clp->cl_machine_cred = cred;
#if defined(CONFIG_NFS_V4_1)
INIT_LIST_HEAD(&clp->cl_layouts);
#endif
nfs_fscache_get_client_cookie(clp);
return clp;
@ -293,6 +290,7 @@ static void nfs_free_client(struct nfs_client *clp)
nfs4_deviceid_purge_client(clp);
kfree(clp->cl_hostname);
kfree(clp->server_scope);
kfree(clp);
dprintk("<-- nfs_free_client()\n");
@ -1062,6 +1060,7 @@ static struct nfs_server *nfs_alloc_server(void)
INIT_LIST_HEAD(&server->client_link);
INIT_LIST_HEAD(&server->master_link);
INIT_LIST_HEAD(&server->delegations);
INIT_LIST_HEAD(&server->layouts);
atomic_set(&server->active, 0);
@ -1464,7 +1463,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
dprintk("<-- %s %p\n", __func__, clp);
return clp;
}
EXPORT_SYMBOL(nfs4_set_ds_client);
EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
/*
* Session has been established, and the client marked ready.

View file

@ -398,12 +398,11 @@ int nfs_inode_return_delegation(struct inode *inode)
return err;
}
static void nfs_mark_return_delegation(struct nfs_delegation *delegation)
static void nfs_mark_return_delegation(struct nfs_server *server,
struct nfs_delegation *delegation)
{
struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client;
set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
/**
@ -441,7 +440,7 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
continue;
if (delegation->type & flags)
nfs_mark_return_delegation(delegation);
nfs_mark_return_delegation(server, delegation);
}
}
@ -508,7 +507,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
continue;
nfs_mark_return_delegation(delegation);
nfs_mark_return_delegation(server, delegation);
}
}
@ -539,7 +538,8 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
int nfs_async_inode_return_delegation(struct inode *inode,
const nfs4_stateid *stateid)
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_client *clp = server->nfs_client;
struct nfs_delegation *delegation;
rcu_read_lock();
@ -549,7 +549,7 @@ int nfs_async_inode_return_delegation(struct inode *inode,
rcu_read_unlock();
return -ENOENT;
}
nfs_mark_return_delegation(delegation);
nfs_mark_return_delegation(server, delegation);
rcu_read_unlock();
nfs_delegation_run_state_manager(clp);

View file

@ -277,6 +277,9 @@ extern void nfs_sb_deactive(struct super_block *sb);
extern char *nfs_path(char **p, struct dentry *dentry,
char *buffer, ssize_t buflen);
extern struct vfsmount *nfs_d_automount(struct path *path);
#ifdef CONFIG_NFS_V4
rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
#endif
/* getroot.c */
extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@ -288,12 +291,22 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
#endif
struct nfs_pageio_descriptor;
/* read.c */
extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
const struct rpc_call_ops *call_ops);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
struct list_head *head);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_readdata_release(struct nfs_read_data *rdata);
/* write.c */
extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
struct list_head *head);
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_writedata_release(struct nfs_write_data *wdata);
extern void nfs_commit_free(struct nfs_write_data *p);
extern int nfs_initiate_write(struct nfs_write_data *data,
struct rpc_clnt *clnt,

View file

@ -119,7 +119,7 @@ char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen)
}
#ifdef CONFIG_NFS_V4
static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
{
struct gss_api_mech *mech;
struct xdr_netobj oid;

View file

@ -48,6 +48,7 @@ enum nfs4_client_state {
NFS4CLNT_SESSION_RESET,
NFS4CLNT_RECALL_SLOT,
NFS4CLNT_LEASE_CONFIRM,
NFS4CLNT_SERVER_SCOPE_MISMATCH,
};
enum nfs4_session_state {
@ -66,6 +67,8 @@ struct nfs4_minor_version_ops {
int cache_reply);
int (*validate_stateid)(struct nfs_delegation *,
const nfs4_stateid *);
int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
struct nfs_fsinfo *);
const struct nfs4_state_recovery_ops *reboot_recovery_ops;
const struct nfs4_state_recovery_ops *nograce_recovery_ops;
const struct nfs4_state_maintenance_ops *state_renewal_ops;
@ -349,6 +352,8 @@ extern void nfs4_schedule_state_manager(struct nfs_client *);
extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_recall_slot(struct nfs_client *clp);
extern void nfs41_handle_server_scope(struct nfs_client *,
struct server_scope **);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);

View file

@ -334,6 +334,9 @@ filelayout_read_pagelist(struct nfs_read_data *data)
__func__, data->inode->i_ino,
data->args.pgbase, (size_t)data->args.count, offset);
if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
return PNFS_NOT_ATTEMPTED;
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
idx = nfs4_fl_calc_ds_index(lseg, j);
@ -344,8 +347,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED;
}
dprintk("%s USE DS:ip %x %hu\n", __func__,
ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr);
/* No multipath support. Use first DS */
data->ds_clp = ds->ds_clp;
@ -374,6 +376,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
struct nfs_fh *fh;
int status;
if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
return PNFS_NOT_ATTEMPTED;
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
idx = nfs4_fl_calc_ds_index(lseg, j);
@ -384,9 +389,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED;
}
dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
data->inode->i_ino, sync, (size_t) data->args.count, offset,
ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
ds->ds_remotestr);
data->write_done_cb = filelayout_write_done_cb;
data->ds_clp = ds->ds_clp;
@ -428,6 +433,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
dprintk("--> %s\n", __func__);
/* FIXME: remove this check when layout segment support is added */
if (lgr->range.offset != 0 ||
lgr->range.length != NFS4_MAX_UINT64) {
dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
__func__);
goto out;
}
if (fl->pattern_offset > lgr->range.offset) {
dprintk("%s pattern_offset %lld too large\n",
__func__, fl->pattern_offset);
@ -449,6 +462,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
goto out;
} else
dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
/* Found deviceid is being reaped */
if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags))
goto out_put;
fl->dsaddr = dsaddr;
if (fl->first_stripe_index < 0 ||
@ -659,7 +676,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
* return true : coalesce page
* return false : don't coalesce page
*/
bool
static bool
filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
@ -670,8 +687,6 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
!nfs_generic_pg_test(pgio, prev, req))
return false;
if (!pgio->pg_lseg)
return 1;
p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
@ -682,6 +697,52 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
return (p_stripe == r_stripe);
}
void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
BUG_ON(pgio->pg_lseg != NULL);
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_READ,
GFP_KERNEL);
/* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL)
nfs_pageio_reset_read_mds(pgio);
}
void
filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
BUG_ON(pgio->pg_lseg != NULL);
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_RW,
GFP_NOFS);
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
nfs_pageio_reset_write_mds(pgio);
}
static const struct nfs_pageio_ops filelayout_pg_read_ops = {
.pg_init = filelayout_pg_init_read,
.pg_test = filelayout_pg_test,
.pg_doio = pnfs_generic_pg_readpages,
};
static const struct nfs_pageio_ops filelayout_pg_write_ops = {
.pg_init = filelayout_pg_init_write,
.pg_test = filelayout_pg_test,
.pg_doio = pnfs_generic_pg_writepages,
};
static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
{
return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
@ -879,7 +940,8 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.owner = THIS_MODULE,
.alloc_lseg = filelayout_alloc_lseg,
.free_lseg = filelayout_free_lseg,
.pg_test = filelayout_pg_test,
.pg_read_ops = &filelayout_pg_read_ops,
.pg_write_ops = &filelayout_pg_write_ops,
.mark_pnfs_commit = filelayout_mark_pnfs_commit,
.choose_commit_list = filelayout_choose_commit_list,
.commit_pagelist = filelayout_commit_pagelist,
@ -902,5 +964,7 @@ static void __exit nfs4filelayout_exit(void)
pnfs_unregister_layoutdriver(&filelayout_type);
}
MODULE_ALIAS("nfs-layouttype4-1");
module_init(nfs4filelayout_init);
module_exit(nfs4filelayout_exit);

View file

@ -47,10 +47,17 @@ enum stripetype4 {
};
/* Individual ip address */
struct nfs4_pnfs_ds_addr {
struct sockaddr_storage da_addr;
size_t da_addrlen;
struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */
char *da_remotestr; /* human readable addr+port */
};
struct nfs4_pnfs_ds {
struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
u32 ds_ip_addr;
u32 ds_port;
char *ds_remotestr; /* comma sep list of addrs */
struct list_head ds_addrs;
struct nfs_client *ds_clp;
atomic_t ds_count;
};
@ -89,6 +96,12 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
generic_hdr);
}
static inline struct nfs4_deviceid_node *
FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
{
return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
}
extern struct nfs_fh *
nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);

View file

@ -56,54 +56,139 @@ print_ds(struct nfs4_pnfs_ds *ds)
printk("%s NULL device\n", __func__);
return;
}
printk(" ip_addr %x port %hu\n"
printk(" ds %s\n"
" ref count %d\n"
" client %p\n"
" cl_exchange_flags %x\n",
ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
ds->ds_remotestr,
atomic_read(&ds->ds_count), ds->ds_clp,
ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
}
/* nfs4_ds_cache_lock is held */
static bool
same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
{
struct sockaddr_in *a, *b;
struct sockaddr_in6 *a6, *b6;
if (addr1->sa_family != addr2->sa_family)
return false;
switch (addr1->sa_family) {
case AF_INET:
a = (struct sockaddr_in *)addr1;
b = (struct sockaddr_in *)addr2;
if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
a->sin_port == b->sin_port)
return true;
break;
case AF_INET6:
a6 = (struct sockaddr_in6 *)addr1;
b6 = (struct sockaddr_in6 *)addr2;
/* LINKLOCAL addresses must have matching scope_id */
if (ipv6_addr_scope(&a6->sin6_addr) ==
IPV6_ADDR_SCOPE_LINKLOCAL &&
a6->sin6_scope_id != b6->sin6_scope_id)
return false;
if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
a6->sin6_port == b6->sin6_port)
return true;
break;
default:
dprintk("%s: unhandled address family: %u\n",
__func__, addr1->sa_family);
return false;
}
return false;
}
/*
* Lookup DS by addresses. The first matching address returns true.
* nfs4_ds_cache_lock is held
*/
static struct nfs4_pnfs_ds *
_data_server_lookup_locked(u32 ip_addr, u32 port)
_data_server_lookup_locked(struct list_head *dsaddrs)
{
struct nfs4_pnfs_ds *ds;
struct nfs4_pnfs_ds_addr *da1, *da2;
dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
ntohl(ip_addr), ntohs(port));
list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
if (ds->ds_ip_addr == ip_addr &&
ds->ds_port == port) {
return ds;
list_for_each_entry(da1, dsaddrs, da_node) {
list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
list_for_each_entry(da2, &ds->ds_addrs, da_node) {
if (same_sockaddr(
(struct sockaddr *)&da1->da_addr,
(struct sockaddr *)&da2->da_addr))
return ds;
}
}
}
return NULL;
}
/*
* Compare two lists of addresses.
*/
static bool
_data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
struct list_head *dsaddrs2)
{
struct nfs4_pnfs_ds_addr *da1, *da2;
size_t count1 = 0,
count2 = 0;
list_for_each_entry(da1, dsaddrs1, da_node)
count1++;
list_for_each_entry(da2, dsaddrs2, da_node) {
bool found = false;
count2++;
list_for_each_entry(da1, dsaddrs1, da_node) {
if (same_sockaddr((struct sockaddr *)&da1->da_addr,
(struct sockaddr *)&da2->da_addr)) {
found = true;
break;
}
}
if (!found)
return false;
}
return (count1 == count2);
}
/*
* Create an rpc connection to the nfs4_pnfs_ds data server
* Currently only support IPv4
* Currently only supports IPv4 and IPv6 addresses
*/
static int
nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
{
struct nfs_client *clp;
struct sockaddr_in sin;
struct nfs_client *clp = ERR_PTR(-EIO);
struct nfs4_pnfs_ds_addr *da;
int status = 0;
dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = ds->ds_ip_addr;
sin.sin_port = ds->ds_port;
BUG_ON(list_empty(&ds->ds_addrs));
list_for_each_entry(da, &ds->ds_addrs, da_node) {
dprintk("%s: DS %s: trying address %s\n",
__func__, ds->ds_remotestr, da->da_remotestr);
clp = nfs4_set_ds_client(mds_srv->nfs_client,
(struct sockaddr *)&da->da_addr,
da->da_addrlen, IPPROTO_TCP);
if (!IS_ERR(clp))
break;
}
clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
sizeof(sin), IPPROTO_TCP);
if (IS_ERR(clp)) {
status = PTR_ERR(clp);
goto out;
@ -115,8 +200,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
goto out_put;
}
ds->ds_clp = clp;
dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
dprintk("%s [existing] server=%s\n", __func__,
ds->ds_remotestr);
goto out;
}
@ -135,8 +220,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
goto out_put;
ds->ds_clp = clp;
dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
ntohs(ds->ds_port));
dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
out:
return status;
out_put:
@ -147,12 +231,25 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
static void
destroy_ds(struct nfs4_pnfs_ds *ds)
{
struct nfs4_pnfs_ds_addr *da;
dprintk("--> %s\n", __func__);
ifdebug(FACILITY)
print_ds(ds);
if (ds->ds_clp)
nfs_put_client(ds->ds_clp);
while (!list_empty(&ds->ds_addrs)) {
da = list_first_entry(&ds->ds_addrs,
struct nfs4_pnfs_ds_addr,
da_node);
list_del_init(&da->da_node);
kfree(da->da_remotestr);
kfree(da);
}
kfree(ds->ds_remotestr);
kfree(ds);
}
@ -179,31 +276,96 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
kfree(dsaddr);
}
static struct nfs4_pnfs_ds *
nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
/*
* Create a string with a human readable address and port to avoid
* complicated setup around many dprinks.
*/
static char *
nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
{
struct nfs4_pnfs_ds *tmp_ds, *ds;
struct nfs4_pnfs_ds_addr *da;
char *remotestr;
size_t len;
char *p;
ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
len = 3; /* '{', '}' and eol */
list_for_each_entry(da, dsaddrs, da_node) {
len += strlen(da->da_remotestr) + 1; /* string plus comma */
}
remotestr = kzalloc(len, gfp_flags);
if (!remotestr)
return NULL;
p = remotestr;
*(p++) = '{';
len--;
list_for_each_entry(da, dsaddrs, da_node) {
size_t ll = strlen(da->da_remotestr);
if (ll > len)
goto out_err;
memcpy(p, da->da_remotestr, ll);
p += ll;
len -= ll;
if (len < 1)
goto out_err;
(*p++) = ',';
len--;
}
if (len < 2)
goto out_err;
*(p++) = '}';
*p = '\0';
return remotestr;
out_err:
kfree(remotestr);
return NULL;
}
static struct nfs4_pnfs_ds *
nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
{
struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
char *remotestr;
if (list_empty(dsaddrs)) {
dprintk("%s: no addresses defined\n", __func__);
goto out;
}
ds = kzalloc(sizeof(*ds), gfp_flags);
if (!ds)
goto out;
/* this is only used for debugging, so it's ok if its NULL */
remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
spin_lock(&nfs4_ds_cache_lock);
tmp_ds = _data_server_lookup_locked(ip_addr, port);
tmp_ds = _data_server_lookup_locked(dsaddrs);
if (tmp_ds == NULL) {
ds->ds_ip_addr = ip_addr;
ds->ds_port = port;
INIT_LIST_HEAD(&ds->ds_addrs);
list_splice_init(dsaddrs, &ds->ds_addrs);
ds->ds_remotestr = remotestr;
atomic_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node);
ds->ds_clp = NULL;
list_add(&ds->ds_node, &nfs4_data_server_cache);
dprintk("%s add new data server ip 0x%x\n", __func__,
ds->ds_ip_addr);
dprintk("%s add new data server %s\n", __func__,
ds->ds_remotestr);
} else {
if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
dsaddrs)) {
dprintk("%s: multipath address mismatch: %s != %s",
__func__, tmp_ds->ds_remotestr, remotestr);
}
kfree(remotestr);
kfree(ds);
atomic_inc(&tmp_ds->ds_count);
dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
__func__, tmp_ds->ds_ip_addr,
dprintk("%s data server %s found, inc'ed ds_count to %d\n",
__func__, tmp_ds->ds_remotestr,
atomic_read(&tmp_ds->ds_count));
ds = tmp_ds;
}
@ -213,18 +375,22 @@ nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
}
/*
* Currently only support ipv4, and one multi-path address.
* Currently only supports ipv4, ipv6 and one multi-path address.
*/
static struct nfs4_pnfs_ds *
decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
static struct nfs4_pnfs_ds_addr *
decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
{
struct nfs4_pnfs_ds *ds = NULL;
char *buf;
const char *ipend, *pstr;
u32 ip_addr, port;
int nlen, rlen, i;
struct nfs4_pnfs_ds_addr *da = NULL;
char *buf, *portstr;
u32 port;
int nlen, rlen;
int tmp[2];
__be32 *p;
char *netid, *match_netid;
size_t len, match_netid_len;
char *startsep = "";
char *endsep = "";
/* r_netid */
p = xdr_inline_decode(streamp, 4);
@ -236,64 +402,123 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla
if (unlikely(!p))
goto out_err;
/* Check that netid is "tcp" */
if (nlen != 3 || memcmp((char *)p, "tcp", 3)) {
dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
netid = kmalloc(nlen+1, gfp_flags);
if (unlikely(!netid))
goto out_err;
}
/* r_addr */
netid[nlen] = '\0';
memcpy(netid, p, nlen);
/* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
p = xdr_inline_decode(streamp, 4);
if (unlikely(!p))
goto out_err;
goto out_free_netid;
rlen = be32_to_cpup(p);
p = xdr_inline_decode(streamp, rlen);
if (unlikely(!p))
goto out_err;
goto out_free_netid;
/* ipv6 length plus port is legal */
if (rlen > INET6_ADDRSTRLEN + 8) {
/* port is ".ABC.DEF", 8 chars max */
if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
dprintk("%s: Invalid address, length %d\n", __func__,
rlen);
goto out_err;
goto out_free_netid;
}
buf = kmalloc(rlen + 1, gfp_flags);
if (!buf) {
dprintk("%s: Not enough memory\n", __func__);
goto out_err;
goto out_free_netid;
}
buf[rlen] = '\0';
memcpy(buf, p, rlen);
/* replace the port dots with dashes for the in4_pton() delimiter*/
for (i = 0; i < 2; i++) {
char *res = strrchr(buf, '.');
if (!res) {
dprintk("%s: Failed finding expected dots in port\n",
__func__);
goto out_free;
}
*res = '-';
/* replace port '.' with '-' */
portstr = strrchr(buf, '.');
if (!portstr) {
dprintk("%s: Failed finding expected dot in port\n",
__func__);
goto out_free_buf;
}
*portstr = '-';
/* find '.' between address and port */
portstr = strrchr(buf, '.');
if (!portstr) {
dprintk("%s: Failed finding expected dot between address and "
"port\n", __func__);
goto out_free_buf;
}
*portstr = '\0';
da = kzalloc(sizeof(*da), gfp_flags);
if (unlikely(!da))
goto out_free_buf;
INIT_LIST_HEAD(&da->da_node);
if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr,
sizeof(da->da_addr))) {
dprintk("%s: error parsing address %s\n", __func__, buf);
goto out_free_da;
}
/* Currently only support ipv4 address */
if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
dprintk("%s: Only ipv4 addresses supported\n", __func__);
goto out_free;
}
/* port */
pstr = ipend;
sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
portstr++;
sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
port = htons((tmp[0] << 8) | (tmp[1]));
ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags);
dprintk("%s: Decoded address and port %s\n", __func__, buf);
out_free:
switch (da->da_addr.ss_family) {
case AF_INET:
((struct sockaddr_in *)&da->da_addr)->sin_port = port;
da->da_addrlen = sizeof(struct sockaddr_in);
match_netid = "tcp";
match_netid_len = 3;
break;
case AF_INET6:
((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
da->da_addrlen = sizeof(struct sockaddr_in6);
match_netid = "tcp6";
match_netid_len = 4;
startsep = "[";
endsep = "]";
break;
default:
dprintk("%s: unsupported address family: %u\n",
__func__, da->da_addr.ss_family);
goto out_free_da;
}
if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
__func__, netid, match_netid);
goto out_free_da;
}
/* save human readable address */
len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
da->da_remotestr = kzalloc(len, gfp_flags);
/* NULL is ok, only used for dprintk */
if (da->da_remotestr)
snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
buf, endsep, ntohs(port));
dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
kfree(buf);
kfree(netid);
return da;
out_free_da:
kfree(da);
out_free_buf:
dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
kfree(buf);
out_free_netid:
kfree(netid);
out_err:
return ds;
return NULL;
}
/* Decode opaque device data and return the result */
@ -310,6 +535,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
struct xdr_stream stream;
struct xdr_buf buf;
struct page *scratch;
struct list_head dsaddrs;
struct nfs4_pnfs_ds_addr *da;
/* set up xdr stream */
scratch = alloc_page(gfp_flags);
@ -386,6 +613,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
NFS_SERVER(ino)->nfs_client,
&pdev->dev_id);
INIT_LIST_HEAD(&dsaddrs);
for (i = 0; i < dsaddr->ds_num; i++) {
int j;
u32 mp_count;
@ -395,48 +624,43 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
goto out_err_free_deviceid;
mp_count = be32_to_cpup(p); /* multipath count */
if (mp_count > 1) {
printk(KERN_WARNING
"%s: Multipath count %d not supported, "
"skipping all greater than 1\n", __func__,
mp_count);
}
for (j = 0; j < mp_count; j++) {
if (j == 0) {
dsaddr->ds_list[i] = decode_and_add_ds(&stream,
ino, gfp_flags);
if (dsaddr->ds_list[i] == NULL)
goto out_err_free_deviceid;
} else {
u32 len;
/* skip extra multipath */
da = decode_ds_addr(&stream, gfp_flags);
if (da)
list_add_tail(&da->da_node, &dsaddrs);
}
if (list_empty(&dsaddrs)) {
dprintk("%s: no suitable DS addresses found\n",
__func__);
goto out_err_free_deviceid;
}
/* read len, skip */
p = xdr_inline_decode(&stream, 4);
if (unlikely(!p))
goto out_err_free_deviceid;
len = be32_to_cpup(p);
dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
if (!dsaddr->ds_list[i])
goto out_err_drain_dsaddrs;
p = xdr_inline_decode(&stream, len);
if (unlikely(!p))
goto out_err_free_deviceid;
/* read len, skip */
p = xdr_inline_decode(&stream, 4);
if (unlikely(!p))
goto out_err_free_deviceid;
len = be32_to_cpup(p);
p = xdr_inline_decode(&stream, len);
if (unlikely(!p))
goto out_err_free_deviceid;
}
/* If DS was already in cache, free ds addrs */
while (!list_empty(&dsaddrs)) {
da = list_first_entry(&dsaddrs,
struct nfs4_pnfs_ds_addr,
da_node);
list_del_init(&da->da_node);
kfree(da->da_remotestr);
kfree(da);
}
}
__free_page(scratch);
return dsaddr;
out_err_drain_dsaddrs:
while (!list_empty(&dsaddrs)) {
da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
da_node);
list_del_init(&da->da_node);
kfree(da->da_remotestr);
kfree(da);
}
out_err_free_deviceid:
nfs4_fl_free_deviceid(dsaddr);
/* stripe_indicies was part of dsaddr */
@ -591,13 +815,13 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
static void
filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
int err, u32 ds_addr)
int err, const char *ds_remotestr)
{
u32 *p = (u32 *)&dsaddr->id_node.deviceid;
printk(KERN_ERR "NFS: data server %x connection error %d."
printk(KERN_ERR "NFS: data server %s connection error %d."
" Deviceid [%x%x%x%x] marked out of use.\n",
ds_addr, err, p[0], p[1], p[2], p[3]);
ds_remotestr, err, p[0], p[1], p[2], p[3]);
spin_lock(&nfs4_ds_cache_lock);
dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
@ -628,7 +852,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
err = nfs4_ds_connect(s, ds);
if (err) {
filelayout_mark_devid_negative(dsaddr, err,
ntohl(ds->ds_ip_addr));
ds->ds_remotestr);
return NULL;
}
}

View file

@ -80,7 +80,10 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
struct nfs_fattr *fattr, struct iattr *sattr,
struct nfs4_state *state);
#ifdef CONFIG_NFS_V4_1
static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *);
static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *);
#endif
/* Prevent leaks of NFSv4 errors into userland */
static int nfs4_map_errors(int err)
{
@ -1689,6 +1692,20 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
return ret;
}
#if defined(CONFIG_NFS_V4_1)
static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
{
int status;
struct nfs_server *server = NFS_SERVER(state->inode);
status = nfs41_test_stateid(server, state);
if (status == NFS_OK)
return 0;
nfs41_free_stateid(server, state);
return nfs4_open_expired(sp, state);
}
#endif
/*
* on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
* fields corresponding to attributes that were used to store the verifier.
@ -2252,13 +2269,14 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
int minor_version = server->nfs_client->cl_minorversion;
int status = nfs4_lookup_root(server, fhandle, info);
if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR))
/*
* A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM
* by nfs4_map_errors() as this function exits.
*/
status = nfs4_find_root_sec(server, fhandle, info);
status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info);
if (status == 0)
status = nfs4_server_capabilities(server, fhandle);
if (status == 0)
@ -4441,6 +4459,20 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
return err;
}
#if defined(CONFIG_NFS_V4_1)
static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request)
{
int status;
struct nfs_server *server = NFS_SERVER(state->inode);
status = nfs41_test_stateid(server, state);
if (status == NFS_OK)
return 0;
nfs41_free_stateid(server, state);
return nfs4_lock_expired(state, request);
}
#endif
static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{
struct nfs_inode *nfsi = NFS_I(state->inode);
@ -4779,6 +4811,16 @@ static int nfs4_check_cl_exchange_flags(u32 flags)
return -NFS4ERR_INVAL;
}
static bool
nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
{
if (a->server_scope_sz == b->server_scope_sz &&
memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
return true;
return false;
}
/*
* nfs4_proc_exchange_id()
*
@ -4821,9 +4863,31 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
init_utsname()->domainname,
clp->cl_rpcclient->cl_auth->au_flavor);
res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
if (unlikely(!res.server_scope))
return -ENOMEM;
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
if (!status)
status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
if (!status) {
if (clp->server_scope &&
!nfs41_same_server_scope(clp->server_scope,
res.server_scope)) {
dprintk("%s: server_scope mismatch detected\n",
__func__);
set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
kfree(clp->server_scope);
clp->server_scope = NULL;
}
if (!clp->server_scope)
clp->server_scope = res.server_scope;
else
kfree(res.server_scope);
}
dprintk("<-- %s status= %d\n", __func__, status);
return status;
}
@ -5704,7 +5768,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutreturn *lrp = calldata;
struct nfs_server *server;
struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
struct pnfs_layout_hdr *lo = lrp->args.layout;
dprintk("--> %s\n", __func__);
@ -5733,7 +5797,7 @@ static void nfs4_layoutreturn_release(void *calldata)
struct nfs4_layoutreturn *lrp = calldata;
dprintk("--> %s\n", __func__);
put_layout_hdr(NFS_I(lrp->args.inode)->layout);
put_layout_hdr(lrp->args.layout);
kfree(calldata);
dprintk("<-- %s\n", __func__);
}
@ -5901,6 +5965,143 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
rpc_put_task(task);
return status;
}
static int
_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
{
struct nfs41_secinfo_no_name_args args = {
.style = SECINFO_STYLE_CURRENT_FH,
};
struct nfs4_secinfo_res res = {
.flavors = flavors,
};
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO_NO_NAME],
.rpc_argp = &args,
.rpc_resp = &res,
};
return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
}
static int
nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
{
struct nfs4_exception exception = { };
int err;
do {
err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
switch (err) {
case 0:
case -NFS4ERR_WRONGSEC:
case -NFS4ERR_NOTSUPP:
break;
default:
err = nfs4_handle_exception(server, err, &exception);
}
} while (exception.retry);
return err;
}
static int
nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
int err;
struct page *page;
rpc_authflavor_t flavor;
struct nfs4_secinfo_flavors *flavors;
page = alloc_page(GFP_KERNEL);
if (!page) {
err = -ENOMEM;
goto out;
}
flavors = page_address(page);
err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
/*
* Fall back on "guess and check" method if
* the server doesn't support SECINFO_NO_NAME
*/
if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) {
err = nfs4_find_root_sec(server, fhandle, info);
goto out_freepage;
}
if (err)
goto out_freepage;
flavor = nfs_find_best_sec(flavors);
if (err == 0)
err = nfs4_lookup_root_sec(server, fhandle, info, flavor);
out_freepage:
put_page(page);
if (err == -EACCES)
return -EPERM;
out:
return err;
}
static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
{
int status;
struct nfs41_test_stateid_args args = {
.stateid = &state->stateid,
};
struct nfs41_test_stateid_res res;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID],
.rpc_argp = &args,
.rpc_resp = &res,
};
args.seq_args.sa_session = res.seq_res.sr_session = NULL;
status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
return status;
}
static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
{
struct nfs4_exception exception = { };
int err;
do {
err = nfs4_handle_exception(server,
_nfs41_test_stateid(server, state),
&exception);
} while (exception.retry);
return err;
}
static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state)
{
int status;
struct nfs41_free_stateid_args args = {
.stateid = &state->stateid,
};
struct nfs41_free_stateid_res res;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID],
.rpc_argp = &args,
.rpc_resp = &res,
};
args.seq_args.sa_session = res.seq_res.sr_session = NULL;
status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
return status;
}
static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state)
{
struct nfs4_exception exception = { };
int err;
do {
err = nfs4_handle_exception(server,
_nfs4_free_stateid(server, state),
&exception);
} while (exception.retry);
return err;
}
#endif /* CONFIG_NFS_V4_1 */
struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@ -5937,8 +6138,8 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
.owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
.state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
.recover_open = nfs4_open_expired,
.recover_lock = nfs4_lock_expired,
.recover_open = nfs41_open_expired,
.recover_lock = nfs41_lock_expired,
.establish_clid = nfs41_init_clientid,
.get_clid_cred = nfs4_get_exchange_id_cred,
};
@ -5962,6 +6163,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
.minor_version = 0,
.call_sync = _nfs4_call_sync,
.validate_stateid = nfs4_validate_delegation_stateid,
.find_root_sec = nfs4_find_root_sec,
.reboot_recovery_ops = &nfs40_reboot_recovery_ops,
.nograce_recovery_ops = &nfs40_nograce_recovery_ops,
.state_renewal_ops = &nfs40_state_renewal_ops,
@ -5972,6 +6174,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
.minor_version = 1,
.call_sync = _nfs4_call_sync_session,
.validate_stateid = nfs41_validate_delegation_stateid,
.find_root_sec = nfs41_find_root_sec,
.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
.state_renewal_ops = &nfs41_state_renewal_ops,

View file

@ -1643,7 +1643,14 @@ static void nfs4_state_manager(struct nfs_client *clp)
goto out_error;
}
clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH,
&clp->cl_state))
nfs4_state_start_reclaim_nograce(clp);
else
set_bit(NFS4CLNT_RECLAIM_REBOOT,
&clp->cl_state);
pnfs_destroy_all_layouts(clp);
}

View file

@ -343,6 +343,14 @@ static int nfs4_stat_to_errno(int);
1 /* FIXME: opaque lrf_body always empty at the moment */)
#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
1 + decode_stateid_maxsz)
#define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1)
#define decode_secinfo_no_name_maxsz decode_secinfo_maxsz
#define encode_test_stateid_maxsz (op_encode_hdr_maxsz + 2 + \
XDR_QUADLEN(NFS4_STATEID_SIZE))
#define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1)
#define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \
XDR_QUADLEN(NFS4_STATEID_SIZE))
#define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1)
#else /* CONFIG_NFS_V4_1 */
#define encode_sequence_maxsz 0
#define decode_sequence_maxsz 0
@ -772,6 +780,26 @@ static int nfs4_stat_to_errno(int);
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_layoutreturn_maxsz)
#define NFS4_enc_secinfo_no_name_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putrootfh_maxsz +\
encode_secinfo_no_name_maxsz)
#define NFS4_dec_secinfo_no_name_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putrootfh_maxsz + \
decode_secinfo_no_name_maxsz)
#define NFS4_enc_test_stateid_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_test_stateid_maxsz)
#define NFS4_dec_test_stateid_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_test_stateid_maxsz)
#define NFS4_enc_free_stateid_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_free_stateid_maxsz)
#define NFS4_dec_free_stateid_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_free_stateid_maxsz)
const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
compound_encode_hdr_maxsz +
@ -1938,6 +1966,46 @@ encode_layoutreturn(struct xdr_stream *xdr,
hdr->nops++;
hdr->replen += decode_layoutreturn_maxsz;
}
static int
encode_secinfo_no_name(struct xdr_stream *xdr,
const struct nfs41_secinfo_no_name_args *args,
struct compound_hdr *hdr)
{
__be32 *p;
p = reserve_space(xdr, 8);
*p++ = cpu_to_be32(OP_SECINFO_NO_NAME);
*p++ = cpu_to_be32(args->style);
hdr->nops++;
hdr->replen += decode_secinfo_no_name_maxsz;
return 0;
}
static void encode_test_stateid(struct xdr_stream *xdr,
struct nfs41_test_stateid_args *args,
struct compound_hdr *hdr)
{
__be32 *p;
p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(OP_TEST_STATEID);
*p++ = cpu_to_be32(1);
xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
hdr->nops++;
hdr->replen += decode_test_stateid_maxsz;
}
static void encode_free_stateid(struct xdr_stream *xdr,
struct nfs41_free_stateid_args *args,
struct compound_hdr *hdr)
{
__be32 *p;
p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(OP_FREE_STATEID);
xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
hdr->nops++;
hdr->replen += decode_free_stateid_maxsz;
}
#endif /* CONFIG_NFS_V4_1 */
/*
@ -2790,6 +2858,59 @@ static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
encode_layoutreturn(xdr, args, &hdr);
encode_nops(&hdr);
}
/*
* Encode SECINFO_NO_NAME request
*/
static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
struct xdr_stream *xdr,
struct nfs41_secinfo_no_name_args *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putrootfh(xdr, &hdr);
encode_secinfo_no_name(xdr, args, &hdr);
encode_nops(&hdr);
return 0;
}
/*
* Encode TEST_STATEID request
*/
static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req,
struct xdr_stream *xdr,
struct nfs41_test_stateid_args *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_test_stateid(xdr, args, &hdr);
encode_nops(&hdr);
}
/*
* Encode FREE_STATEID request
*/
static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req,
struct xdr_stream *xdr,
struct nfs41_free_stateid_args *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_free_stateid(xdr, args, &hdr);
encode_nops(&hdr);
}
#endif /* CONFIG_NFS_V4_1 */
static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@ -4977,11 +5098,17 @@ static int decode_exchange_id(struct xdr_stream *xdr,
if (unlikely(status))
return status;
/* Throw away server_scope */
/* Save server_scope */
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
return status;
if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
return -EIO;
memcpy(res->server_scope->server_scope, dummy_str, dummy);
res->server_scope->server_scope_sz = dummy;
/* Throw away Implementation id array */
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
@ -5322,6 +5449,55 @@ static int decode_layoutcommit(struct xdr_stream *xdr,
print_overflow_msg(__func__, xdr);
return -EIO;
}
static int decode_test_stateid(struct xdr_stream *xdr,
struct nfs41_test_stateid_res *res)
{
__be32 *p;
int status;
int num_res;
status = decode_op_hdr(xdr, OP_TEST_STATEID);
if (status)
return status;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
num_res = be32_to_cpup(p++);
if (num_res != 1)
goto out;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
res->status = be32_to_cpup(p++);
return res->status;
out_overflow:
print_overflow_msg(__func__, xdr);
out:
return -EIO;
}
static int decode_free_stateid(struct xdr_stream *xdr,
struct nfs41_free_stateid_res *res)
{
__be32 *p;
int status;
status = decode_op_hdr(xdr, OP_FREE_STATEID);
if (status)
return status;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
res->status = be32_to_cpup(p++);
return res->status;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
#endif /* CONFIG_NFS_V4_1 */
/*
@ -6461,6 +6637,72 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
out:
return status;
}
/*
* Decode SECINFO_NO_NAME response
*/
static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
struct nfs4_secinfo_res *res)
{
struct compound_hdr hdr;
int status;
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
status = decode_putrootfh(xdr);
if (status)
goto out;
status = decode_secinfo(xdr, res);
out:
return status;
}
/*
* Decode TEST_STATEID response
*/
static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
struct nfs41_test_stateid_res *res)
{
struct compound_hdr hdr;
int status;
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
status = decode_test_stateid(xdr, res);
out:
return status;
}
/*
* Decode FREE_STATEID response
*/
static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
struct nfs41_free_stateid_res *res)
{
struct compound_hdr hdr;
int status;
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
status = decode_free_stateid(xdr, res);
out:
return status;
}
#endif /* CONFIG_NFS_V4_1 */
/**
@ -6663,6 +6905,9 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit),
PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn),
PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name),
PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid),
PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid),
#endif /* CONFIG_NFS_V4_1 */
};

View file

@ -1000,13 +1000,22 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
if (!pnfs_generic_pg_test(pgio, prev, req))
return false;
if (pgio->pg_lseg == NULL)
return true;
return pgio->pg_count + req->wb_bytes <=
OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
}
static const struct nfs_pageio_ops objio_pg_read_ops = {
.pg_init = pnfs_generic_pg_init_read,
.pg_test = objio_pg_test,
.pg_doio = pnfs_generic_pg_readpages,
};
static const struct nfs_pageio_ops objio_pg_write_ops = {
.pg_init = pnfs_generic_pg_init_write,
.pg_test = objio_pg_test,
.pg_doio = pnfs_generic_pg_writepages,
};
static struct pnfs_layoutdriver_type objlayout_type = {
.id = LAYOUT_OSD2_OBJECTS,
.name = "LAYOUT_OSD2_OBJECTS",
@ -1020,7 +1029,8 @@ static struct pnfs_layoutdriver_type objlayout_type = {
.read_pagelist = objlayout_read_pagelist,
.write_pagelist = objlayout_write_pagelist,
.pg_test = objio_pg_test,
.pg_read_ops = &objio_pg_read_ops,
.pg_write_ops = &objio_pg_write_ops,
.free_deviceid_node = objio_free_deviceid_node,
@ -1055,5 +1065,7 @@ objlayout_exit(void)
__func__);
}
MODULE_ALIAS("nfs-layouttype4-2");
module_init(objlayout_init);
module_exit(objlayout_exit);

View file

@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
*/
void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
int (*doio)(struct nfs_pageio_descriptor *),
const struct nfs_pageio_ops *pg_ops,
size_t bsize,
int io_flags)
{
@ -240,13 +240,12 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_bsize = bsize;
desc->pg_base = 0;
desc->pg_moreio = 0;
desc->pg_recoalesce = 0;
desc->pg_inode = inode;
desc->pg_doio = doio;
desc->pg_ops = pg_ops;
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
desc->pg_test = nfs_generic_pg_test;
pnfs_pageio_init(desc, inode);
}
/**
@ -276,7 +275,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
return false;
if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
return false;
return pgio->pg_test(pgio, prev, req);
return pgio->pg_ops->pg_test(pgio, prev, req);
}
/**
@ -297,6 +296,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
if (!nfs_can_coalesce_requests(prev, req, desc))
return 0;
} else {
if (desc->pg_ops->pg_init)
desc->pg_ops->pg_init(desc, req);
desc->pg_base = req->wb_pgbase;
}
nfs_list_remove_request(req);
@ -311,7 +312,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
{
if (!list_empty(&desc->pg_list)) {
int error = desc->pg_doio(desc);
int error = desc->pg_ops->pg_doio(desc);
if (error < 0)
desc->pg_error = error;
else
@ -331,7 +332,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
* Returns true if the request 'req' was successfully coalesced into the
* existing list of pages 'desc'.
*/
int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
while (!nfs_pageio_do_add_request(desc, req)) {
@ -340,17 +341,67 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (desc->pg_error < 0)
return 0;
desc->pg_moreio = 0;
if (desc->pg_recoalesce)
return 0;
}
return 1;
}
static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
{
LIST_HEAD(head);
do {
list_splice_init(&desc->pg_list, &head);
desc->pg_bytes_written -= desc->pg_count;
desc->pg_count = 0;
desc->pg_base = 0;
desc->pg_recoalesce = 0;
while (!list_empty(&head)) {
struct nfs_page *req;
req = list_first_entry(&head, struct nfs_page, wb_list);
nfs_list_remove_request(req);
if (__nfs_pageio_add_request(desc, req))
continue;
if (desc->pg_error < 0)
return 0;
break;
}
} while (desc->pg_recoalesce);
return 1;
}
int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
int ret;
do {
ret = __nfs_pageio_add_request(desc, req);
if (ret)
break;
if (desc->pg_error < 0)
break;
ret = nfs_do_recoalesce(desc);
} while (ret);
return ret;
}
/**
* nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
* @desc: pointer to io descriptor
*/
void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
{
nfs_pageio_doio(desc);
for (;;) {
nfs_pageio_doio(desc);
if (!desc->pg_recoalesce)
break;
if (!nfs_do_recoalesce(desc))
break;
}
}
/**
@ -369,7 +420,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
if (!list_empty(&desc->pg_list)) {
struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev);
if (index != prev->wb_index + 1)
nfs_pageio_doio(desc);
nfs_pageio_complete(desc);
}
}

View file

@ -28,6 +28,7 @@
*/
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include "internal.h"
#include "pnfs.h"
#include "iostat.h"
@ -448,11 +449,20 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
void
pnfs_destroy_all_layouts(struct nfs_client *clp)
{
struct nfs_server *server;
struct pnfs_layout_hdr *lo;
LIST_HEAD(tmp_list);
nfs4_deviceid_mark_client_invalid(clp);
nfs4_deviceid_purge_client(clp);
spin_lock(&clp->cl_lock);
list_splice_init(&clp->cl_layouts, &tmp_list);
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
if (!list_empty(&server->layouts))
list_splice_init(&server->layouts, &tmp_list);
}
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
while (!list_empty(&tmp_list)) {
@ -661,6 +671,7 @@ _pnfs_return_layout(struct inode *ino)
lrp->args.stateid = stateid;
lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
lrp->args.inode = ino;
lrp->args.layout = lo;
lrp->clp = NFS_SERVER(ino)->nfs_client;
status = nfs4_proc_layoutreturn(lrp);
@ -920,7 +931,8 @@ pnfs_update_layout(struct inode *ino,
};
unsigned pg_offset;
struct nfs_inode *nfsi = NFS_I(ino);
struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
struct nfs_server *server = NFS_SERVER(ino);
struct nfs_client *clp = server->nfs_client;
struct pnfs_layout_hdr *lo;
struct pnfs_layout_segment *lseg = NULL;
bool first = false;
@ -964,7 +976,7 @@ pnfs_update_layout(struct inode *ino,
*/
spin_lock(&clp->cl_lock);
BUG_ON(!list_empty(&lo->plh_layouts));
list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
list_add_tail(&lo->plh_layouts, &server->layouts);
spin_unlock(&clp->cl_lock);
}
@ -973,7 +985,8 @@ pnfs_update_layout(struct inode *ino,
arg.offset -= pg_offset;
arg.length += pg_offset;
}
arg.length = PAGE_CACHE_ALIGN(arg.length);
if (arg.length != NFS4_MAX_UINT64)
arg.length = PAGE_CACHE_ALIGN(arg.length);
lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
if (!lseg && first) {
@ -991,6 +1004,7 @@ pnfs_update_layout(struct inode *ino,
spin_unlock(&ino->i_lock);
goto out;
}
EXPORT_SYMBOL_GPL(pnfs_update_layout);
int
pnfs_layout_process(struct nfs4_layoutget *lgp)
@ -1048,35 +1062,71 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
goto out;
}
void
pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
BUG_ON(pgio->pg_lseg != NULL);
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
req_offset(req),
req->wb_bytes,
IOMODE_READ,
GFP_KERNEL);
/* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL)
nfs_pageio_reset_read_mds(pgio);
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
void
pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
BUG_ON(pgio->pg_lseg != NULL);
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
req_offset(req),
req->wb_bytes,
IOMODE_RW,
GFP_NOFS);
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
nfs_pageio_reset_write_mds(pgio);
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
bool
pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
{
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
if (ld == NULL)
return false;
nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
return true;
}
bool
pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
{
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
if (ld == NULL)
return false;
nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
return true;
}
bool
pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
enum pnfs_iomode access_type;
gfp_t gfp_flags;
/* We assume that pg_ioflags == 0 iff we're reading a page */
if (pgio->pg_ioflags == 0) {
access_type = IOMODE_READ;
gfp_flags = GFP_KERNEL;
} else {
access_type = IOMODE_RW;
gfp_flags = GFP_NOFS;
}
if (pgio->pg_lseg == NULL) {
if (pgio->pg_count != prev->wb_bytes)
return true;
/* This is first coelesce call for a series of nfs_pages */
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
prev->wb_context,
req_offset(prev),
pgio->pg_count,
access_type,
gfp_flags);
if (pgio->pg_lseg == NULL)
return true;
}
if (pgio->pg_lseg == NULL)
return nfs_generic_pg_test(pgio, prev, req);
/*
* Test if a nfs_page is fully contained in the pnfs_layout_range.
@ -1120,15 +1170,30 @@ pnfs_ld_write_done(struct nfs_write_data *data)
}
EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
enum pnfs_try_status
static void
pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
struct nfs_write_data *data)
{
list_splice_tail_init(&data->pages, &desc->pg_list);
if (data->req && list_empty(&data->req->wb_list))
nfs_list_add_request(data->req, &desc->pg_list);
nfs_pageio_reset_write_mds(desc);
desc->pg_recoalesce = 1;
nfs_writedata_release(data);
}
static enum pnfs_try_status
pnfs_try_to_write_data(struct nfs_write_data *wdata,
const struct rpc_call_ops *call_ops, int how)
const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg,
int how)
{
struct inode *inode = wdata->inode;
enum pnfs_try_status trypnfs;
struct nfs_server *nfss = NFS_SERVER(inode);
wdata->mds_ops = call_ops;
wdata->lseg = get_lseg(lseg);
dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
inode->i_ino, wdata->args.count, wdata->args.offset, how);
@ -1144,6 +1209,44 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
return trypnfs;
}
static void
pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
{
struct nfs_write_data *data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
desc->pg_lseg = NULL;
while (!list_empty(head)) {
enum pnfs_try_status trypnfs;
data = list_entry(head->next, struct nfs_write_data, list);
list_del_init(&data->list);
trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
if (trypnfs == PNFS_NOT_ATTEMPTED)
pnfs_write_through_mds(desc, data);
}
put_lseg(lseg);
}
int
pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
LIST_HEAD(head);
int ret;
ret = nfs_generic_flush(desc, &head);
if (ret != 0) {
put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
return ret;
}
pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
return 0;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
/*
* Called by non rpc-based layout drivers
*/
@ -1167,18 +1270,32 @@ pnfs_ld_read_done(struct nfs_read_data *data)
}
EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
static void
pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
struct nfs_read_data *data)
{
list_splice_tail_init(&data->pages, &desc->pg_list);
if (data->req && list_empty(&data->req->wb_list))
nfs_list_add_request(data->req, &desc->pg_list);
nfs_pageio_reset_read_mds(desc);
desc->pg_recoalesce = 1;
nfs_readdata_release(data);
}
/*
* Call the appropriate parallel I/O subsystem read function.
*/
enum pnfs_try_status
static enum pnfs_try_status
pnfs_try_to_read_data(struct nfs_read_data *rdata,
const struct rpc_call_ops *call_ops)
const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg)
{
struct inode *inode = rdata->inode;
struct nfs_server *nfss = NFS_SERVER(inode);
enum pnfs_try_status trypnfs;
rdata->mds_ops = call_ops;
rdata->lseg = get_lseg(lseg);
dprintk("%s: Reading ino:%lu %u@%llu\n",
__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
@ -1194,6 +1311,44 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
return trypnfs;
}
static void
pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
{
struct nfs_read_data *data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
desc->pg_lseg = NULL;
while (!list_empty(head)) {
enum pnfs_try_status trypnfs;
data = list_entry(head->next, struct nfs_read_data, list);
list_del_init(&data->list);
trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
if (trypnfs == PNFS_NOT_ATTEMPTED)
pnfs_read_through_mds(desc, data);
}
put_lseg(lseg);
}
int
pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
LIST_HEAD(head);
int ret;
ret = nfs_generic_pagein(desc, &head);
if (ret != 0) {
put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
return ret;
}
pnfs_do_multiple_reads(desc, &head);
return 0;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
/*
* Currently there is only one (whole file) write lseg.
*/

View file

@ -87,7 +87,8 @@ struct pnfs_layoutdriver_type {
void (*free_lseg) (struct pnfs_layout_segment *lseg);
/* test for nfs page cache coalescing */
bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
const struct nfs_pageio_ops *pg_read_ops;
const struct nfs_pageio_ops *pg_write_ops;
/* Returns true if layoutdriver wants to divert this request to
* driver's commit routine.
@ -148,16 +149,16 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
/* pnfs.c */
void get_layout_hdr(struct pnfs_layout_hdr *lo);
void put_lseg(struct pnfs_layout_segment *lseg);
struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
loff_t pos, u64 count, enum pnfs_iomode access_type,
gfp_t gfp_flags);
bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int);
void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
void unset_pnfs_layoutdriver(struct nfs_server *);
enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
const struct rpc_call_ops *, int);
enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
const struct rpc_call_ops *);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
@ -182,6 +183,19 @@ int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
int _pnfs_return_layout(struct inode *);
int pnfs_ld_write_done(struct nfs_write_data *);
int pnfs_ld_read_done(struct nfs_read_data *);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx,
loff_t pos,
u64 count,
enum pnfs_iomode iomode,
gfp_t gfp_flags);
void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
/* nfs4_deviceid_flags */
enum {
NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */
};
/* pnfs_dev.c */
struct nfs4_deviceid_node {
@ -189,13 +203,13 @@ struct nfs4_deviceid_node {
struct hlist_node tmpnode;
const struct pnfs_layoutdriver_type *ld;
const struct nfs_client *nfs_client;
unsigned long flags;
struct nfs4_deviceid deviceid;
atomic_t ref;
};
void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
const struct pnfs_layoutdriver_type *,
@ -293,15 +307,6 @@ static inline int pnfs_return_layout(struct inode *ino)
return 0;
}
static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
struct inode *inode)
{
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
if (ld)
pgio->pg_test = ld->pg_test;
}
#else /* CONFIG_NFS_V4_1 */
static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@ -322,28 +327,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
{
}
static inline struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
loff_t pos, u64 count, enum pnfs_iomode access_type,
gfp_t gfp_flags)
{
return NULL;
}
static inline enum pnfs_try_status
pnfs_try_to_read_data(struct nfs_read_data *data,
const struct rpc_call_ops *call_ops)
{
return PNFS_NOT_ATTEMPTED;
}
static inline enum pnfs_try_status
pnfs_try_to_write_data(struct nfs_write_data *data,
const struct rpc_call_ops *call_ops, int how)
{
return PNFS_NOT_ATTEMPTED;
}
static inline int pnfs_return_layout(struct inode *ino)
{
return 0;
@ -385,9 +368,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
{
}
static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
struct inode *inode)
static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
{
return false;
}
static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
{
return false;
}
static inline void

View file

@ -100,8 +100,8 @@ _find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
rcu_read_lock();
d = _lookup_deviceid(ld, clp, id, hash);
if (d && !atomic_inc_not_zero(&d->ref))
d = NULL;
if (d != NULL)
atomic_inc(&d->ref);
rcu_read_unlock();
return d;
}
@ -115,15 +115,15 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
/*
* Unhash and put deviceid
* Remove a deviceid from cache
*
* @clp nfs_client associated with deviceid
* @id the deviceid to unhash
*
* @ret the unhashed node, if found and dereferenced to zero, NULL otherwise.
*/
struct nfs4_deviceid_node *
nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
void
nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
const struct nfs_client *clp, const struct nfs4_deviceid *id)
{
struct nfs4_deviceid_node *d;
@ -134,7 +134,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
rcu_read_unlock();
if (!d) {
spin_unlock(&nfs4_deviceid_lock);
return NULL;
return;
}
hlist_del_init_rcu(&d->node);
spin_unlock(&nfs4_deviceid_lock);
@ -142,28 +142,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
/* balance the initial ref set in pnfs_insert_deviceid */
if (atomic_dec_and_test(&d->ref))
return d;
return NULL;
}
EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid);
/*
* Delete a deviceid from cache
*
* @clp struct nfs_client qualifying the deviceid
* @id deviceid to delete
*/
void
nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
const struct nfs_client *clp, const struct nfs4_deviceid *id)
{
struct nfs4_deviceid_node *d;
d = nfs4_unhash_put_deviceid(ld, clp, id);
if (!d)
return;
d->ld->free_deviceid_node(d);
d->ld->free_deviceid_node(d);
}
EXPORT_SYMBOL_GPL(nfs4_delete_deviceid);
@ -177,6 +156,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
INIT_HLIST_NODE(&d->tmpnode);
d->ld = ld;
d->nfs_client = nfs_client;
d->flags = 0;
d->deviceid = *id;
atomic_set(&d->ref, 1);
}
@ -221,16 +201,15 @@ EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node);
*
* @d deviceid node to put
*
* @ret true iff the node was deleted
* return true iff the node was deleted
* Note that since the test for d->ref == 0 is sufficient to establish
* that the node is no longer hashed in the global device id cache.
*/
bool
nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
{
if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock))
if (!atomic_dec_and_test(&d->ref))
return false;
hlist_del_init_rcu(&d->node);
spin_unlock(&nfs4_deviceid_lock);
synchronize_rcu();
d->ld->free_deviceid_node(d);
return true;
}
@ -275,3 +254,22 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp)
for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
_deviceid_purge_client(clp, h);
}
/*
* Stop use of all deviceids associated with an nfs_client
*/
void
nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
{
struct nfs4_deviceid_node *d;
struct hlist_node *n;
int i;
rcu_read_lock();
for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){
hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node)
if (d->nfs_client == clp)
set_bit(NFS_DEVICEID_INVALID, &d->flags);
}
rcu_read_unlock();
}

View file

@ -30,8 +30,7 @@
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
static const struct nfs_pageio_ops nfs_pageio_read_ops;
static const struct rpc_call_ops nfs_read_partial_ops;
static const struct rpc_call_ops nfs_read_full_ops;
@ -68,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
mempool_free(p, nfs_rdata_mempool);
}
static void nfs_readdata_release(struct nfs_read_data *rdata)
void nfs_readdata_release(struct nfs_read_data *rdata)
{
put_lseg(rdata->lseg);
put_nfs_open_context(rdata->args.context);
@ -113,6 +112,27 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
}
}
static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode)
{
nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
NFS_SERVER(inode)->rsize, 0);
}
void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
{
pgio->pg_ops = &nfs_pageio_read_ops;
pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
struct inode *inode)
{
if (!pnfs_pageio_init_read(pgio, inode))
nfs_pageio_init_read_mds(pgio, inode);
}
int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
struct page *page)
{
@ -131,14 +151,9 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);
nfs_pageio_init(&pgio, inode, NULL, 0, 0);
nfs_list_add_request(new, &pgio.pg_list);
pgio.pg_count = len;
if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
nfs_pagein_multi(&pgio);
else
nfs_pagein_one(&pgio);
nfs_pageio_init_read(&pgio, inode);
nfs_pageio_add_request(&pgio, new);
nfs_pageio_complete(&pgio);
return 0;
}
@ -202,17 +217,14 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
/*
* Set up the NFS read request struct
*/
static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset,
struct pnfs_layout_segment *lseg)
static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
unsigned int count, unsigned int offset)
{
struct inode *inode = req->wb_context->dentry->d_inode;
data->req = req;
data->inode = inode;
data->cred = req->wb_context->cred;
data->lseg = get_lseg(lseg);
data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
@ -226,14 +238,36 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
data->res.count = count;
data->res.eof = 0;
nfs_fattr_init(&data->fattr);
}
if (data->lseg &&
(pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
return 0;
static int nfs_do_read(struct nfs_read_data *data,
const struct rpc_call_ops *call_ops)
{
struct inode *inode = data->args.context->dentry->d_inode;
return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
}
static int
nfs_do_multiple_reads(struct list_head *head,
const struct rpc_call_ops *call_ops)
{
struct nfs_read_data *data;
int ret = 0;
while (!list_empty(head)) {
int ret2;
data = list_entry(head->next, struct nfs_read_data, list);
list_del_init(&data->list);
ret2 = nfs_do_read(data, call_ops);
if (ret == 0)
ret = ret2;
}
return ret;
}
static void
nfs_async_read_error(struct list_head *head)
{
@ -260,20 +294,19 @@ nfs_async_read_error(struct list_head *head)
* won't see the new data until our attribute cache is updated. This is more
* or less conventional NFS client behavior.
*/
static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
struct nfs_read_data *data;
size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
size_t rsize = desc->pg_bsize, nbytes;
unsigned int offset;
int requests = 0;
int ret = 0;
struct pnfs_layout_segment *lseg;
LIST_HEAD(list);
nfs_list_remove_request(req);
offset = 0;
nbytes = desc->pg_count;
do {
size_t len = min(nbytes,rsize);
@ -281,45 +314,21 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
data = nfs_readdata_alloc(1);
if (!data)
goto out_bad;
list_add(&data->pages, &list);
data->pagevec[0] = page;
nfs_read_rpcsetup(req, data, len, offset);
list_add(&data->list, res);
requests++;
nbytes -= len;
offset += len;
} while(nbytes != 0);
atomic_set(&req->wb_complete, requests);
BUG_ON(desc->pg_lseg != NULL);
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
req_offset(req), desc->pg_count,
IOMODE_READ, GFP_KERNEL);
ClearPageError(page);
offset = 0;
nbytes = desc->pg_count;
do {
int ret2;
data = list_entry(list.next, struct nfs_read_data, pages);
list_del_init(&data->pages);
data->pagevec[0] = page;
if (nbytes < rsize)
rsize = nbytes;
ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
rsize, offset, lseg);
if (ret == 0)
ret = ret2;
offset += rsize;
nbytes -= rsize;
} while (nbytes != 0);
put_lseg(lseg);
desc->pg_lseg = NULL;
desc->pg_rpc_callops = &nfs_read_partial_ops;
return ret;
out_bad:
while (!list_empty(&list)) {
data = list_entry(list.next, struct nfs_read_data, pages);
list_del(&data->pages);
while (!list_empty(res)) {
data = list_entry(res->next, struct nfs_read_data, list);
list_del(&data->list);
nfs_readdata_free(data);
}
SetPageError(page);
@ -327,19 +336,19 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
return -ENOMEM;
}
static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req;
struct page **pages;
struct nfs_read_data *data;
struct list_head *head = &desc->pg_list;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
int ret = -ENOMEM;
int ret = 0;
data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
desc->pg_count));
if (!data) {
nfs_async_read_error(head);
ret = -ENOMEM;
goto out;
}
@ -352,19 +361,37 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
if ((!lseg) && list_is_singular(&data->pages))
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
req_offset(req), desc->pg_count,
IOMODE_READ, GFP_KERNEL);
ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
0, lseg);
nfs_read_rpcsetup(req, data, desc->pg_count, 0);
list_add(&data->list, res);
desc->pg_rpc_callops = &nfs_read_full_ops;
out:
put_lseg(lseg);
desc->pg_lseg = NULL;
return ret;
}
int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
{
if (desc->pg_bsize < PAGE_CACHE_SIZE)
return nfs_pagein_multi(desc, head);
return nfs_pagein_one(desc, head);
}
static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
LIST_HEAD(head);
int ret;
ret = nfs_generic_pagein(desc, &head);
if (ret == 0)
ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
return ret;
}
static const struct nfs_pageio_ops nfs_pageio_read_ops = {
.pg_test = nfs_generic_pg_test,
.pg_doio = nfs_generic_pg_readpages,
};
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
@ -635,8 +662,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
.pgio = &pgio,
};
struct inode *inode = mapping->host;
struct nfs_server *server = NFS_SERVER(inode);
size_t rsize = server->rsize;
unsigned long npages;
int ret = -ESTALE;
@ -664,10 +689,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
goto read_complete; /* all pages were read */
if (rsize < PAGE_CACHE_SIZE)
nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
else
nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0);
nfs_pageio_init_read(&pgio, inode);
ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);

View file

@ -147,7 +147,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
alias = d_lookup(parent, &data->args.name);
if (alias != NULL) {
int ret = 0;
int ret;
void *devname_garbage = NULL;
/*
@ -155,14 +155,16 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
* the sillyrename information to the aliased dentry.
*/
nfs_free_dname(data);
ret = nfs_copy_dname(alias, data);
spin_lock(&alias->d_lock);
if (alias->d_inode != NULL &&
if (ret == 0 && alias->d_inode != NULL &&
!(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
devname_garbage = alias->d_fsdata;
alias->d_fsdata = data;
alias->d_flags |= DCACHE_NFSFS_RENAMED;
ret = 1;
}
} else
ret = 0;
spin_unlock(&alias->d_lock);
nfs_dec_sillycount(dir);
dput(alias);
@ -171,8 +173,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
* point dentry is definitely not a root, so we won't need
* that anymore.
*/
if (devname_garbage)
kfree(devname_garbage);
kfree(devname_garbage);
return ret;
}
data->dir = igrab(dir);
@ -204,8 +205,6 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
if (parent == NULL)
goto out_free;
dir = parent->d_inode;
if (nfs_copy_dname(dentry, data) != 0)
goto out_dput;
/* Non-exclusive lock protects against concurrent lookup() calls */
spin_lock(&dir->i_lock);
if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) {
@ -366,6 +365,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
struct nfs_renamedata *data = calldata;
struct inode *old_dir = data->old_dir;
struct inode *new_dir = data->new_dir;
struct dentry *old_dentry = data->old_dentry;
struct dentry *new_dentry = data->new_dentry;
if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client);
@ -373,12 +374,12 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
}
if (task->tk_status != 0) {
nfs_cancel_async_unlink(data->old_dentry);
nfs_cancel_async_unlink(old_dentry);
return;
}
nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir));
d_move(data->old_dentry, data->new_dentry);
d_drop(old_dentry);
d_drop(new_dentry);
}
/**
@ -501,6 +502,14 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
* and only performs the unlink once the last reference to it is put.
*
* The final cleanup is done during dentry_iput.
*
* (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server
* could take responsibility for keeping open files referenced. The server
* would also need to ensure that opened-but-deleted files were kept over
* reboots. However, we may not assume a server does so. (RFC 5661
* does provide an OPEN4_RESULT_PRESERVE_UNLINKED flag that a server can
* use to advertise that it does this; some day we may take advantage of
* it.))
*/
int
nfs_sillyrename(struct inode *dir, struct dentry *dentry)
@ -560,6 +569,14 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
if (error)
goto out_dput;
/* populate unlinkdata with the right dname */
error = nfs_copy_dname(sdentry,
(struct nfs_unlinkdata *)dentry->d_fsdata);
if (error) {
nfs_cancel_async_unlink(dentry);
goto out_dput;
}
/* run the rename task, undo unlink if it fails */
task = nfs_async_rename(dir, dir, dentry, sdentry);
if (IS_ERR(task)) {

View file

@ -97,7 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p)
mempool_free(p, nfs_wdata_mempool);
}
static void nfs_writedata_release(struct nfs_write_data *wdata)
void nfs_writedata_release(struct nfs_write_data *wdata)
{
put_lseg(wdata->lseg);
put_nfs_open_context(wdata->args.context);
@ -845,11 +845,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
/*
* Set up the argument/result storage required for the RPC call.
*/
static int nfs_write_rpcsetup(struct nfs_page *req,
static void nfs_write_rpcsetup(struct nfs_page *req,
struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset,
struct pnfs_layout_segment *lseg,
int how)
{
struct inode *inode = req->wb_context->dentry->d_inode;
@ -860,7 +858,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
data->req = req;
data->inode = inode = req->wb_context->dentry->d_inode;
data->cred = req->wb_context->cred;
data->lseg = get_lseg(lseg);
data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
@ -872,24 +869,51 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
data->args.context = get_nfs_open_context(req->wb_context);
data->args.lock_context = req->wb_lock_context;
data->args.stable = NFS_UNSTABLE;
if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
data->args.stable = NFS_DATA_SYNC;
if (!nfs_need_commit(NFS_I(inode)))
data->args.stable = NFS_FILE_SYNC;
switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
case 0:
break;
case FLUSH_COND_STABLE:
if (nfs_need_commit(NFS_I(inode)))
break;
default:
data->args.stable = NFS_FILE_SYNC;
}
data->res.fattr = &data->fattr;
data->res.count = count;
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
}
if (data->lseg &&
(pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
return 0;
static int nfs_do_write(struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
int how)
{
struct inode *inode = data->args.context->dentry->d_inode;
return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
}
static int nfs_do_multiple_writes(struct list_head *head,
const struct rpc_call_ops *call_ops,
int how)
{
struct nfs_write_data *data;
int ret = 0;
while (!list_empty(head)) {
int ret2;
data = list_entry(head->next, struct nfs_write_data, list);
list_del_init(&data->list);
ret2 = nfs_do_write(data, call_ops, how);
if (ret == 0)
ret = ret2;
}
return ret;
}
/* If a nfs_flush_* function fails, it should remove reqs from @head and
* call this on each, which will prepare them to be retried on next
* writeback using standard nfs.
@ -907,17 +931,15 @@ static void nfs_redirty_request(struct nfs_page *req)
* Generate multiple small requests to write out a single
* contiguous dirty area on one page.
*/
static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
struct nfs_write_data *data;
size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
size_t wsize = desc->pg_bsize, nbytes;
unsigned int offset;
int requests = 0;
int ret = 0;
struct pnfs_layout_segment *lseg;
LIST_HEAD(list);
nfs_list_remove_request(req);
@ -927,6 +949,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
desc->pg_ioflags &= ~FLUSH_COND_STABLE;
offset = 0;
nbytes = desc->pg_count;
do {
size_t len = min(nbytes, wsize);
@ -934,45 +957,21 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
data = nfs_writedata_alloc(1);
if (!data)
goto out_bad;
list_add(&data->pages, &list);
data->pagevec[0] = page;
nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags);
list_add(&data->list, res);
requests++;
nbytes -= len;
offset += len;
} while (nbytes != 0);
atomic_set(&req->wb_complete, requests);
BUG_ON(desc->pg_lseg);
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
req_offset(req), desc->pg_count,
IOMODE_RW, GFP_NOFS);
ClearPageError(page);
offset = 0;
nbytes = desc->pg_count;
do {
int ret2;
data = list_entry(list.next, struct nfs_write_data, pages);
list_del_init(&data->pages);
data->pagevec[0] = page;
if (nbytes < wsize)
wsize = nbytes;
ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
wsize, offset, lseg, desc->pg_ioflags);
if (ret == 0)
ret = ret2;
offset += wsize;
nbytes -= wsize;
} while (nbytes != 0);
put_lseg(lseg);
desc->pg_lseg = NULL;
desc->pg_rpc_callops = &nfs_write_partial_ops;
return ret;
out_bad:
while (!list_empty(&list)) {
data = list_entry(list.next, struct nfs_write_data, pages);
list_del(&data->pages);
while (!list_empty(res)) {
data = list_entry(res->next, struct nfs_write_data, list);
list_del(&data->list);
nfs_writedata_free(data);
}
nfs_redirty_request(req);
@ -987,14 +986,13 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
* This is the case if nfs_updatepage detects a conflicting request
* that has been written but not committed.
*/
static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req;
struct page **pages;
struct nfs_write_data *data;
struct list_head *head = &desc->pg_list;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
int ret;
int ret = 0;
data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
desc->pg_count));
@ -1016,32 +1014,62 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
if ((!lseg) && list_is_singular(&data->pages))
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
req_offset(req), desc->pg_count,
IOMODE_RW, GFP_NOFS);
if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
(desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
desc->pg_ioflags &= ~FLUSH_COND_STABLE;
/* Set up the argument struct */
ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
list_add(&data->list, res);
desc->pg_rpc_callops = &nfs_write_full_ops;
out:
put_lseg(lseg); /* Cleans any gotten in ->pg_test */
desc->pg_lseg = NULL;
return ret;
}
int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
{
if (desc->pg_bsize < PAGE_CACHE_SIZE)
return nfs_flush_multi(desc, head);
return nfs_flush_one(desc, head);
}
static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
LIST_HEAD(head);
int ret;
ret = nfs_generic_flush(desc, &head);
if (ret == 0)
ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
desc->pg_ioflags);
return ret;
}
static const struct nfs_pageio_ops nfs_pageio_write_ops = {
.pg_test = nfs_generic_pg_test,
.pg_doio = nfs_generic_pg_writepages,
};
static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags)
{
nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
NFS_SERVER(inode)->wsize, ioflags);
}
void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
{
pgio->pg_ops = &nfs_pageio_write_ops;
pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags)
{
size_t wsize = NFS_SERVER(inode)->wsize;
if (wsize < PAGE_CACHE_SIZE)
nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
else
nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags);
if (!pnfs_pageio_init_write(pgio, inode, ioflags))
nfs_pageio_init_write_mds(pgio, inode, ioflags);
}
/*

View file

@ -563,6 +563,9 @@ enum {
NFSPROC4_CLNT_GETDEVICEINFO,
NFSPROC4_CLNT_LAYOUTCOMMIT,
NFSPROC4_CLNT_LAYOUTRETURN,
NFSPROC4_CLNT_SECINFO_NO_NAME,
NFSPROC4_CLNT_TEST_STATEID,
NFSPROC4_CLNT_FREE_STATEID,
};
/* nfs41 types */

View file

@ -16,6 +16,7 @@ struct nfs4_sequence_args;
struct nfs4_sequence_res;
struct nfs_server;
struct nfs4_minor_version_ops;
struct server_scope;
/*
* The nfs_client identifies our client state to the server.
@ -77,12 +78,13 @@ struct nfs_client {
/* The flags used for obtaining the clientid during EXCHANGE_ID */
u32 cl_exchange_flags;
struct nfs4_session *cl_session; /* sharred session */
struct list_head cl_layouts;
#endif /* CONFIG_NFS_V4 */
#ifdef CONFIG_NFS_FSCACHE
struct fscache_cookie *fscache; /* client index cache cookie */
#endif
struct server_scope *server_scope; /* from exchange_id */
};
/*
@ -149,6 +151,7 @@ struct nfs_server {
struct rb_root openowner_id;
struct rb_root lockowner_id;
#endif
struct list_head layouts;
struct list_head delegations;
void (*destroy)(struct nfs_server *);

View file

@ -55,20 +55,28 @@ struct nfs_page {
struct nfs_writeverf wb_verf; /* Commit cookie */
};
struct nfs_pageio_descriptor;
struct nfs_pageio_ops {
void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *);
bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
int (*pg_doio)(struct nfs_pageio_descriptor *);
};
struct nfs_pageio_descriptor {
struct list_head pg_list;
unsigned long pg_bytes_written;
size_t pg_count;
size_t pg_bsize;
unsigned int pg_base;
char pg_moreio;
unsigned char pg_moreio : 1,
pg_recoalesce : 1;
struct inode *pg_inode;
int (*pg_doio)(struct nfs_pageio_descriptor *);
const struct nfs_pageio_ops *pg_ops;
int pg_ioflags;
int pg_error;
const struct rpc_call_ops *pg_rpc_callops;
struct pnfs_layout_segment *pg_lseg;
bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
};
#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
@ -85,7 +93,7 @@ extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
pgoff_t idx_start, unsigned int npages, int tag);
extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
int (*doio)(struct nfs_pageio_descriptor *desc),
const struct nfs_pageio_ops *pg_ops,
size_t bsize,
int how);
extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
@ -100,7 +108,6 @@ extern void nfs_unlock_request(struct nfs_page *req);
extern int nfs_set_page_tag_locked(struct nfs_page *req);
extern void nfs_clear_page_tag_locked(struct nfs_page *req);
/*
* Lock the page of an asynchronous request without getting a new reference
*/

View file

@ -269,9 +269,10 @@ struct nfs4_layoutcommit_data {
};
struct nfs4_layoutreturn_args {
__u32 layout_type;
struct pnfs_layout_hdr *layout;
struct inode *inode;
nfs4_stateid stateid;
__u32 layout_type;
struct nfs4_sequence_args seq_args;
};
@ -1060,6 +1061,7 @@ struct server_scope {
struct nfs41_exchange_id_res {
struct nfs_client *client;
u32 flags;
struct server_scope *server_scope;
};
struct nfs41_create_session_args {
@ -1083,6 +1085,34 @@ struct nfs41_reclaim_complete_args {
struct nfs41_reclaim_complete_res {
struct nfs4_sequence_res seq_res;
};
#define SECINFO_STYLE_CURRENT_FH 0
#define SECINFO_STYLE_PARENT 1
struct nfs41_secinfo_no_name_args {
int style;
struct nfs4_sequence_args seq_args;
};
struct nfs41_test_stateid_args {
nfs4_stateid *stateid;
struct nfs4_sequence_args seq_args;
};
struct nfs41_test_stateid_res {
unsigned int status;
struct nfs4_sequence_res seq_res;
};
struct nfs41_free_stateid_args {
nfs4_stateid *stateid;
struct nfs4_sequence_args seq_args;
};
struct nfs41_free_stateid_res {
unsigned int status;
struct nfs4_sequence_res seq_res;
};
#endif /* CONFIG_NFS_V4_1 */
struct nfs_page;
@ -1096,6 +1126,7 @@ struct nfs_read_data {
struct rpc_cred *cred;
struct nfs_fattr fattr; /* fattr storage */
struct list_head pages; /* Coalesced read requests */
struct list_head list; /* lists of struct nfs_read_data */
struct nfs_page *req; /* multi ops per nfs_page */
struct page **pagevec;
unsigned int npages; /* Max length of pagevec */
@ -1119,6 +1150,7 @@ struct nfs_write_data {
struct nfs_fattr fattr;
struct nfs_writeverf verf;
struct list_head pages; /* Coalesced requests we wish to flush */
struct list_head list; /* lists of struct nfs_write_data */
struct nfs_page *req; /* multi ops per nfs_page */
struct page **pagevec;
unsigned int npages; /* Max length of pagevec */

View file

@ -41,9 +41,6 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <scsi/osd_protocol.h>
#define PNFS_OSD_OSDNAME_MAXSIZE 256
/*
* draft-ietf-nfsv4-minorversion-22
@ -99,12 +96,6 @@ struct pnfs_osd_objid {
#define _DEVID_HI(oid_device_id) \
(unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1)
static inline int
pnfs_osd_objid_xdr_sz(void)
{
return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2;
}
enum pnfs_osd_version {
PNFS_OSD_MISSING = 0,
PNFS_OSD_VERSION_1 = 1,
@ -189,8 +180,6 @@ struct pnfs_osd_targetid {
struct nfs4_string oti_scsi_device_id;
};
enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 };
/* struct netaddr4 {
* // see struct rpcb in RFC1833
* string r_netid<>; // network id
@ -207,12 +196,6 @@ struct pnfs_osd_targetaddr {
struct pnfs_osd_net_addr ota_netaddr;
};
enum {
NETWORK_ID_MAX = 16 / 4,
UNIVERSAL_ADDRESS_MAX = 64 / 4,
PNFS_OSD_TARGETADDR_MAX = 3 + NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX,
};
struct pnfs_osd_deviceaddr {
struct pnfs_osd_targetid oda_targetid;
struct pnfs_osd_targetaddr oda_targetaddr;
@ -222,15 +205,6 @@ struct pnfs_osd_deviceaddr {
struct nfs4_string oda_osdname;
};
enum {
ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4,
PNFS_OSD_DEVICEADDR_MAX =
PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX +
2 /*oda_lun*/ +
1 + OSD_SYSTEMID_LEN +
1 + ODA_OSDNAME_MAX,
};
/* LAYOUTCOMMIT: layoutupdate */
/* union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
@ -279,7 +253,7 @@ struct pnfs_osd_ioerr {
u32 oer_errno;
};
/* OSD XDR API */
/* OSD XDR Client API */
/* Layout helpers */
/* Layout decoding is done in two parts:
* 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part
@ -337,8 +311,7 @@ extern int
pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
struct pnfs_osd_layoutupdate *lou);
/* osd_ioerror encoding/decoding (layout_return) */
/* Client */
/* osd_ioerror encoding (layout_return) */
extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr);
extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr);

View file

@ -31,7 +31,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/sched.h>
#ifdef CONFIG_NFS_V4_1
#ifdef CONFIG_SUNRPC_BACKCHANNEL
struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt);
void xprt_free_bc_request(struct rpc_rqst *req);
int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
@ -47,7 +47,7 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
return 1;
return 0;
}
#else /* CONFIG_NFS_V4_1 */
#else /* CONFIG_SUNRPC_BACKCHANNEL */
static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
unsigned int min_reqs)
{
@ -62,6 +62,6 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
static inline void xprt_free_bc_request(struct rpc_rqst *req)
{
}
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
#endif /* _LINUX_SUNRPC_BC_XPRT_H */

View file

@ -227,6 +227,10 @@ void rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
void rpc_destroy_wait_queue(struct rpc_wait_queue *);
void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
rpc_action action);
void rpc_sleep_on_priority(struct rpc_wait_queue *,
struct rpc_task *,
rpc_action action,
int priority);
void rpc_wake_up_queued_task(struct rpc_wait_queue *,
struct rpc_task *);
void rpc_wake_up(struct rpc_wait_queue *);

View file

@ -92,7 +92,7 @@ struct svc_serv {
struct module * sv_module; /* optional module to count when
* adding threads */
svc_thread_fn sv_function; /* main function for threads */
#if defined(CONFIG_NFS_V4_1)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
struct list_head sv_cb_list; /* queue for callback requests
* that arrive over the same
* connection */
@ -100,7 +100,7 @@ struct svc_serv {
wait_queue_head_t sv_cb_waitq; /* sleep here if there are no
* entries in the svc_cb_list */
struct svc_xprt *sv_bc_xprt; /* callback on fore channel */
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
};
/*

View file

@ -22,6 +22,7 @@
#define RPC_MIN_SLOT_TABLE (2U)
#define RPC_DEF_SLOT_TABLE (16U)
#define RPC_MAX_SLOT_TABLE (128U)
#define RPC_MAX_SLOT_TABLE_LIMIT (65536U)
/*
* This describes a timeout strategy
@ -100,18 +101,18 @@ struct rpc_rqst {
ktime_t rq_xtime; /* transmit time stamp */
int rq_ntrans;
#if defined(CONFIG_NFS_V4_1)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
struct list_head rq_bc_list; /* Callback service list */
unsigned long rq_bc_pa_state; /* Backchannel prealloc state */
struct list_head rq_bc_pa_list; /* Backchannel prealloc list */
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_SUNRPC_BACKCHANEL */
};
#define rq_svec rq_snd_buf.head
#define rq_slen rq_snd_buf.len
struct rpc_xprt_ops {
void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
int (*reserve_xprt)(struct rpc_task *task);
int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
void (*rpcbind)(struct rpc_task *task);
void (*set_port)(struct rpc_xprt *xprt, unsigned short port);
@ -164,12 +165,12 @@ struct rpc_xprt {
struct rpc_wait_queue binding; /* requests waiting on rpcbind */
struct rpc_wait_queue sending; /* requests waiting to send */
struct rpc_wait_queue resend; /* requests waiting to resend */
struct rpc_wait_queue pending; /* requests in flight */
struct rpc_wait_queue backlog; /* waiting for slot */
struct list_head free; /* free slots */
struct rpc_rqst * slot; /* slot table storage */
unsigned int max_reqs; /* total slots */
unsigned int max_reqs; /* max number of slots */
unsigned int min_reqs; /* min number of slots */
atomic_t num_reqs; /* total slots */
unsigned long state; /* transport state */
unsigned char shutdown : 1, /* being shut down */
resvport : 1; /* use a reserved port */
@ -200,7 +201,7 @@ struct rpc_xprt {
u32 xid; /* Next XID value to use */
struct rpc_task * snd_task; /* Task blocked in send */
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
#if defined(CONFIG_NFS_V4_1)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
struct svc_serv *bc_serv; /* The RPC service which will */
/* process the callback */
unsigned int bc_alloc_count; /* Total number of preallocs */
@ -208,7 +209,7 @@ struct rpc_xprt {
* items */
struct list_head bc_pa_list; /* List of preallocated
* backchannel rpc_rqst's */
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
struct list_head recv;
struct {
@ -228,15 +229,15 @@ struct rpc_xprt {
const char *address_strings[RPC_DISPLAY_MAX];
};
#if defined(CONFIG_NFS_V4_1)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
* Backchannel flags
*/
#define RPC_BC_PA_IN_USE 0x0001 /* Preallocated backchannel */
/* buffer in use */
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
#if defined(CONFIG_NFS_V4_1)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
static inline int bc_prealloc(struct rpc_rqst *req)
{
return test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
@ -246,7 +247,7 @@ static inline int bc_prealloc(struct rpc_rqst *req)
{
return 0;
}
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
struct xprt_create {
int ident; /* XPRT_TRANSPORT identifier */
@ -271,8 +272,8 @@ struct xprt_class {
struct rpc_xprt *xprt_create_transport(struct xprt_create *args);
void xprt_connect(struct rpc_task *task);
void xprt_reserve(struct rpc_task *task);
int xprt_reserve_xprt(struct rpc_task *task);
int xprt_reserve_xprt_cong(struct rpc_task *task);
int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
int xprt_prepare_transmit(struct rpc_task *task);
void xprt_transmit(struct rpc_task *task);
void xprt_end_transmit(struct rpc_task *task);
@ -282,7 +283,9 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
void xprt_release(struct rpc_task *task);
struct rpc_xprt * xprt_get(struct rpc_xprt *xprt);
void xprt_put(struct rpc_xprt *xprt);
struct rpc_xprt * xprt_alloc(struct net *net, int size, int max_req);
struct rpc_xprt * xprt_alloc(struct net *net, size_t size,
unsigned int num_prealloc,
unsigned int max_req);
void xprt_free(struct rpc_xprt *);
static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
@ -321,7 +324,6 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
#define XPRT_CLOSING (6)
#define XPRT_CONNECTION_ABORT (7)
#define XPRT_CONNECTION_CLOSE (8)
#define XPRT_INITIALIZED (9)
static inline void xprt_set_connected(struct rpc_xprt *xprt)
{

View file

@ -4,6 +4,10 @@ config SUNRPC
config SUNRPC_GSS
tristate
config SUNRPC_BACKCHANNEL
bool
depends on SUNRPC
config SUNRPC_XPRT_RDMA
tristate
depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL

View file

@ -13,6 +13,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
addr.o rpcb_clnt.o timer.o xdr.o \
sunrpc_syms.o cache.o rpc_pipe.o \
svc_xprt.o
sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o
sunrpc-$(CONFIG_PROC_FS) += stats.o
sunrpc-$(CONFIG_SYSCTL) += sysctl.o

View file

@ -29,8 +29,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RPCDBG_FACILITY RPCDBG_TRANS
#endif
#if defined(CONFIG_NFS_V4_1)
/*
* Helper routines that track the number of preallocation elements
* on the transport.
@ -174,7 +172,7 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
dprintk("RPC: setup backchannel transport failed\n");
return -1;
}
EXPORT_SYMBOL(xprt_setup_backchannel);
EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
/*
* Destroys the backchannel preallocated structures.
@ -204,7 +202,7 @@ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
dprintk("RPC: backchannel list empty= %s\n",
list_empty(&xprt->bc_pa_list) ? "true" : "false");
}
EXPORT_SYMBOL(xprt_destroy_backchannel);
EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
/*
* One or more rpc_rqst structure have been preallocated during the
@ -279,4 +277,3 @@ void xprt_free_bc_request(struct rpc_rqst *req)
spin_unlock_bh(&xprt->bc_pa_lock);
}
#endif /* CONFIG_NFS_V4_1 */

View file

@ -27,8 +27,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* reply over an existing open connection previously established by the client.
*/
#if defined(CONFIG_NFS_V4_1)
#include <linux/module.h>
#include <linux/sunrpc/xprt.h>
@ -63,4 +61,3 @@ int bc_send(struct rpc_rqst *req)
return ret;
}
#endif /* CONFIG_NFS_V4_1 */

View file

@ -64,9 +64,9 @@ static void call_decode(struct rpc_task *task);
static void call_bind(struct rpc_task *task);
static void call_bind_status(struct rpc_task *task);
static void call_transmit(struct rpc_task *task);
#if defined(CONFIG_NFS_V4_1)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
static void call_bc_transmit(struct rpc_task *task);
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
static void call_status(struct rpc_task *task);
static void call_transmit_status(struct rpc_task *task);
static void call_refresh(struct rpc_task *task);
@ -715,7 +715,7 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags,
}
EXPORT_SYMBOL_GPL(rpc_call_async);
#if defined(CONFIG_NFS_V4_1)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/**
* rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
* rpc_execute against it
@ -758,7 +758,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
return task;
}
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
void
rpc_call_start(struct rpc_task *task)
@ -1361,7 +1361,7 @@ call_transmit_status(struct rpc_task *task)
}
}
#if defined(CONFIG_NFS_V4_1)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
* 5b. Send the backchannel RPC reply. On error, drop the reply. In
* addition, disconnect on connectivity errors.
@ -1425,7 +1425,7 @@ call_bc_transmit(struct rpc_task *task)
}
rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
}
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/*
* 6. Sort out the RPC call status
@ -1550,8 +1550,7 @@ call_decode(struct rpc_task *task)
kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode;
__be32 *p;
dprintk("RPC: %5u call_decode (status %d)\n",
task->tk_pid, task->tk_status);
dprint_status(task);
if (task->tk_flags & RPC_CALL_MAJORSEEN) {
if (clnt->cl_chatty)

View file

@ -97,14 +97,16 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
/*
* Add new request to a priority queue.
*/
static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task)
static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
struct rpc_task *task,
unsigned char queue_priority)
{
struct list_head *q;
struct rpc_task *t;
INIT_LIST_HEAD(&task->u.tk_wait.links);
q = &queue->tasks[task->tk_priority];
if (unlikely(task->tk_priority > queue->maxpriority))
q = &queue->tasks[queue_priority];
if (unlikely(queue_priority > queue->maxpriority))
q = &queue->tasks[queue->maxpriority];
list_for_each_entry(t, q, u.tk_wait.list) {
if (t->tk_owner == task->tk_owner) {
@ -123,12 +125,14 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r
* improve overall performance.
* Everyone else gets appended to the queue to ensure proper FIFO behavior.
*/
static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
struct rpc_task *task,
unsigned char queue_priority)
{
BUG_ON (RPC_IS_QUEUED(task));
if (RPC_IS_PRIORITY(queue))
__rpc_add_wait_queue_priority(queue, task);
__rpc_add_wait_queue_priority(queue, task, queue_priority);
else if (RPC_IS_SWAPPER(task))
list_add(&task->u.tk_wait.list, &queue->tasks[0]);
else
@ -311,13 +315,15 @@ static void rpc_make_runnable(struct rpc_task *task)
* NB: An RPC task will only receive interrupt-driven events as long
* as it's on a wait queue.
*/
static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action)
static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
struct rpc_task *task,
rpc_action action,
unsigned char queue_priority)
{
dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
task->tk_pid, rpc_qname(q), jiffies);
__rpc_add_wait_queue(q, task);
__rpc_add_wait_queue(q, task, queue_priority);
BUG_ON(task->tk_callback != NULL);
task->tk_callback = action;
@ -334,11 +340,25 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
* Protect the queue operations.
*/
spin_lock_bh(&q->lock);
__rpc_sleep_on(q, task, action);
__rpc_sleep_on_priority(q, task, action, task->tk_priority);
spin_unlock_bh(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on);
void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action, int priority)
{
/* We shouldn't ever put an inactive task to sleep */
BUG_ON(!RPC_IS_ACTIVATED(task));
/*
* Protect the queue operations.
*/
spin_lock_bh(&q->lock);
__rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW);
spin_unlock_bh(&q->lock);
}
/**
* __rpc_do_wake_up_task - wake up a single rpc_task
* @queue: wait queue

View file

@ -1252,7 +1252,7 @@ svc_process(struct svc_rqst *rqstp)
}
}
#if defined(CONFIG_NFS_V4_1)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
* Process a backchannel RPC request that arrived over an existing
* outbound connection
@ -1300,8 +1300,8 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
return 0;
}
}
EXPORT_SYMBOL(bc_svc_process);
#endif /* CONFIG_NFS_V4_1 */
EXPORT_SYMBOL_GPL(bc_svc_process);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/*
* Return (transport-specific) limit on the rpc payload.

View file

@ -68,12 +68,12 @@ static void svc_sock_free(struct svc_xprt *);
static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
struct net *, struct sockaddr *,
int, int);
#if defined(CONFIG_NFS_V4_1)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
struct net *, struct sockaddr *,
int, int);
static void svc_bc_sock_free(struct svc_xprt *xprt);
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key svc_key[2];
@ -1243,7 +1243,7 @@ static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
}
#if defined(CONFIG_NFS_V4_1)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
struct net *, struct sockaddr *,
int, int);
@ -1284,7 +1284,7 @@ static void svc_cleanup_bc_xprt_sock(void)
{
svc_unreg_xprt_class(&svc_tcp_bc_class);
}
#else /* CONFIG_NFS_V4_1 */
#else /* CONFIG_SUNRPC_BACKCHANNEL */
static void svc_init_bc_xprt_sock(void)
{
}
@ -1292,7 +1292,7 @@ static void svc_init_bc_xprt_sock(void)
static void svc_cleanup_bc_xprt_sock(void)
{
}
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
static struct svc_xprt_ops svc_tcp_ops = {
.xpo_create = svc_tcp_create,
@ -1623,7 +1623,7 @@ static void svc_sock_free(struct svc_xprt *xprt)
kfree(svsk);
}
#if defined(CONFIG_NFS_V4_1)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
* Create a back channel svc_xprt which shares the fore channel socket.
*/
@ -1662,4 +1662,4 @@ static void svc_bc_sock_free(struct svc_xprt *xprt)
if (xprt)
kfree(container_of(xprt, struct svc_sock, sk_xprt));
}
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */

View file

@ -126,7 +126,7 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len)
kaddr[buf->page_base + len] = '\0';
kunmap_atomic(kaddr, KM_USER0);
}
EXPORT_SYMBOL(xdr_terminate_string);
EXPORT_SYMBOL_GPL(xdr_terminate_string);
void
xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,

View file

@ -62,6 +62,7 @@
/*
* Local functions
*/
static void xprt_init(struct rpc_xprt *xprt, struct net *net);
static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
static void xprt_connect_status(struct rpc_task *task);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
@ -191,10 +192,10 @@ EXPORT_SYMBOL_GPL(xprt_load_transport);
* transport connects from colliding with writes. No congestion control
* is provided.
*/
int xprt_reserve_xprt(struct rpc_task *task)
int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
int priority;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
if (task == xprt->snd_task)
@ -202,8 +203,10 @@ int xprt_reserve_xprt(struct rpc_task *task)
goto out_sleep;
}
xprt->snd_task = task;
req->rq_bytes_sent = 0;
req->rq_ntrans++;
if (req != NULL) {
req->rq_bytes_sent = 0;
req->rq_ntrans++;
}
return 1;
@ -212,10 +215,13 @@ int xprt_reserve_xprt(struct rpc_task *task)
task->tk_pid, xprt);
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
if (req->rq_ntrans)
rpc_sleep_on(&xprt->resend, task, NULL);
if (req == NULL)
priority = RPC_PRIORITY_LOW;
else if (!req->rq_ntrans)
priority = RPC_PRIORITY_NORMAL;
else
rpc_sleep_on(&xprt->sending, task, NULL);
priority = RPC_PRIORITY_HIGH;
rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
return 0;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
@ -239,22 +245,24 @@ static void xprt_clear_locked(struct rpc_xprt *xprt)
* integrated into the decision of whether a request is allowed to be
* woken up and given access to the transport.
*/
int xprt_reserve_xprt_cong(struct rpc_task *task)
int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_rqst *req = task->tk_rqstp;
int priority;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
if (task == xprt->snd_task)
return 1;
goto out_sleep;
}
if (req == NULL) {
xprt->snd_task = task;
return 1;
}
if (__xprt_get_cong(xprt, task)) {
xprt->snd_task = task;
if (req) {
req->rq_bytes_sent = 0;
req->rq_ntrans++;
}
req->rq_bytes_sent = 0;
req->rq_ntrans++;
return 1;
}
xprt_clear_locked(xprt);
@ -262,10 +270,13 @@ int xprt_reserve_xprt_cong(struct rpc_task *task)
dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
if (req && req->rq_ntrans)
rpc_sleep_on(&xprt->resend, task, NULL);
if (req == NULL)
priority = RPC_PRIORITY_LOW;
else if (!req->rq_ntrans)
priority = RPC_PRIORITY_NORMAL;
else
rpc_sleep_on(&xprt->sending, task, NULL);
priority = RPC_PRIORITY_HIGH;
rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
return 0;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
@ -275,7 +286,7 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
int retval;
spin_lock_bh(&xprt->transport_lock);
retval = xprt->ops->reserve_xprt(task);
retval = xprt->ops->reserve_xprt(xprt, task);
spin_unlock_bh(&xprt->transport_lock);
return retval;
}
@ -288,12 +299,9 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt)
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
return;
task = rpc_wake_up_next(&xprt->resend);
if (!task) {
task = rpc_wake_up_next(&xprt->sending);
if (!task)
goto out_unlock;
}
task = rpc_wake_up_next(&xprt->sending);
if (task == NULL)
goto out_unlock;
req = task->tk_rqstp;
xprt->snd_task = task;
@ -310,24 +318,25 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt)
static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
{
struct rpc_task *task;
struct rpc_rqst *req;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
return;
if (RPCXPRT_CONGESTED(xprt))
goto out_unlock;
task = rpc_wake_up_next(&xprt->resend);
if (!task) {
task = rpc_wake_up_next(&xprt->sending);
if (!task)
goto out_unlock;
task = rpc_wake_up_next(&xprt->sending);
if (task == NULL)
goto out_unlock;
req = task->tk_rqstp;
if (req == NULL) {
xprt->snd_task = task;
return;
}
if (__xprt_get_cong(xprt, task)) {
struct rpc_rqst *req = task->tk_rqstp;
xprt->snd_task = task;
if (req) {
req->rq_bytes_sent = 0;
req->rq_ntrans++;
}
req->rq_bytes_sent = 0;
req->rq_ntrans++;
return;
}
out_unlock:
@ -852,7 +861,7 @@ int xprt_prepare_transmit(struct rpc_task *task)
err = req->rq_reply_bytes_recvd;
goto out_unlock;
}
if (!xprt->ops->reserve_xprt(task))
if (!xprt->ops->reserve_xprt(xprt, task))
err = -EAGAIN;
out_unlock:
spin_unlock_bh(&xprt->transport_lock);
@ -928,28 +937,66 @@ void xprt_transmit(struct rpc_task *task)
spin_unlock_bh(&xprt->transport_lock);
}
static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
{
struct rpc_rqst *req = ERR_PTR(-EAGAIN);
if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
goto out;
req = kzalloc(sizeof(struct rpc_rqst), gfp_flags);
if (req != NULL)
goto out;
atomic_dec(&xprt->num_reqs);
req = ERR_PTR(-ENOMEM);
out:
return req;
}
static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) {
kfree(req);
return true;
}
return false;
}
static void xprt_alloc_slot(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_rqst *req;
task->tk_status = 0;
if (task->tk_rqstp)
return;
if (!list_empty(&xprt->free)) {
struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
list_del_init(&req->rq_list);
task->tk_rqstp = req;
xprt_request_init(task, xprt);
return;
req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
list_del(&req->rq_list);
goto out_init_req;
}
req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT);
if (!IS_ERR(req))
goto out_init_req;
switch (PTR_ERR(req)) {
case -ENOMEM:
rpc_delay(task, HZ >> 2);
dprintk("RPC: dynamic allocation of request slot "
"failed! Retrying\n");
break;
case -EAGAIN:
rpc_sleep_on(&xprt->backlog, task, NULL);
dprintk("RPC: waiting for request slot\n");
}
dprintk("RPC: waiting for request slot\n");
task->tk_status = -EAGAIN;
task->tk_timeout = 0;
rpc_sleep_on(&xprt->backlog, task, NULL);
return;
out_init_req:
task->tk_status = 0;
task->tk_rqstp = req;
xprt_request_init(task, xprt);
}
static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
if (xprt_dynamic_free_slot(xprt, req))
return;
memset(req, 0, sizeof(*req)); /* mark unused */
spin_lock(&xprt->reserve_lock);
@ -958,25 +1005,49 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
spin_unlock(&xprt->reserve_lock);
}
struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req)
static void xprt_free_all_slots(struct rpc_xprt *xprt)
{
struct rpc_rqst *req;
while (!list_empty(&xprt->free)) {
req = list_first_entry(&xprt->free, struct rpc_rqst, rq_list);
list_del(&req->rq_list);
kfree(req);
}
}
struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
unsigned int num_prealloc,
unsigned int max_alloc)
{
struct rpc_xprt *xprt;
struct rpc_rqst *req;
int i;
xprt = kzalloc(size, GFP_KERNEL);
if (xprt == NULL)
goto out;
atomic_set(&xprt->count, 1);
xprt->max_reqs = max_req;
xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL);
if (xprt->slot == NULL)
xprt_init(xprt, net);
for (i = 0; i < num_prealloc; i++) {
req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
if (!req)
break;
list_add(&req->rq_list, &xprt->free);
}
if (i < num_prealloc)
goto out_free;
if (max_alloc > num_prealloc)
xprt->max_reqs = max_alloc;
else
xprt->max_reqs = num_prealloc;
xprt->min_reqs = num_prealloc;
atomic_set(&xprt->num_reqs, num_prealloc);
xprt->xprt_net = get_net(net);
return xprt;
out_free:
kfree(xprt);
xprt_free(xprt);
out:
return NULL;
}
@ -985,7 +1056,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc);
void xprt_free(struct rpc_xprt *xprt)
{
put_net(xprt->xprt_net);
kfree(xprt->slot);
xprt_free_all_slots(xprt);
kfree(xprt);
}
EXPORT_SYMBOL_GPL(xprt_free);
@ -1001,10 +1072,24 @@ void xprt_reserve(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
task->tk_status = -EIO;
task->tk_status = 0;
if (task->tk_rqstp != NULL)
return;
/* Note: grabbing the xprt_lock_write() here is not strictly needed,
* but ensures that we throttle new slot allocation if the transport
* is congested (e.g. if reconnecting or if we're out of socket
* write buffer space).
*/
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
if (!xprt_lock_write(xprt, task))
return;
spin_lock(&xprt->reserve_lock);
xprt_alloc_slot(task);
spin_unlock(&xprt->reserve_lock);
xprt_release_write(xprt, task);
}
static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
@ -1021,6 +1106,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
{
struct rpc_rqst *req = task->tk_rqstp;
INIT_LIST_HEAD(&req->rq_list);
req->rq_timeout = task->tk_client->cl_timeout->to_initval;
req->rq_task = task;
req->rq_xprt = xprt;
@ -1073,6 +1159,34 @@ void xprt_release(struct rpc_task *task)
xprt_free_bc_request(req);
}
static void xprt_init(struct rpc_xprt *xprt, struct net *net)
{
atomic_set(&xprt->count, 1);
spin_lock_init(&xprt->transport_lock);
spin_lock_init(&xprt->reserve_lock);
INIT_LIST_HEAD(&xprt->free);
INIT_LIST_HEAD(&xprt->recv);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
spin_lock_init(&xprt->bc_pa_lock);
INIT_LIST_HEAD(&xprt->bc_pa_list);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
xprt->last_used = jiffies;
xprt->cwnd = RPC_INITCWND;
xprt->bind_index = 0;
rpc_init_wait_queue(&xprt->binding, "xprt_binding");
rpc_init_wait_queue(&xprt->pending, "xprt_pending");
rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending");
rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
xprt_init_xid(xprt);
xprt->xprt_net = get_net(net);
}
/**
* xprt_create_transport - create an RPC transport
* @args: rpc transport creation arguments
@ -1081,7 +1195,6 @@ void xprt_release(struct rpc_task *task)
struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
{
struct rpc_xprt *xprt;
struct rpc_rqst *req;
struct xprt_class *t;
spin_lock(&xprt_list_lock);
@ -1100,46 +1213,17 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
if (IS_ERR(xprt)) {
dprintk("RPC: xprt_create_transport: failed, %ld\n",
-PTR_ERR(xprt));
return xprt;
goto out;
}
if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state))
/* ->setup returned a pre-initialized xprt: */
return xprt;
spin_lock_init(&xprt->transport_lock);
spin_lock_init(&xprt->reserve_lock);
INIT_LIST_HEAD(&xprt->free);
INIT_LIST_HEAD(&xprt->recv);
#if defined(CONFIG_NFS_V4_1)
spin_lock_init(&xprt->bc_pa_lock);
INIT_LIST_HEAD(&xprt->bc_pa_list);
#endif /* CONFIG_NFS_V4_1 */
INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
if (xprt_has_timer(xprt))
setup_timer(&xprt->timer, xprt_init_autodisconnect,
(unsigned long)xprt);
else
init_timer(&xprt->timer);
xprt->last_used = jiffies;
xprt->cwnd = RPC_INITCWND;
xprt->bind_index = 0;
rpc_init_wait_queue(&xprt->binding, "xprt_binding");
rpc_init_wait_queue(&xprt->pending, "xprt_pending");
rpc_init_wait_queue(&xprt->sending, "xprt_sending");
rpc_init_wait_queue(&xprt->resend, "xprt_resend");
rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
/* initialize free list */
for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--)
list_add(&req->rq_list, &xprt->free);
xprt_init_xid(xprt);
dprintk("RPC: created transport %p with %u slots\n", xprt,
xprt->max_reqs);
out:
return xprt;
}
@ -1157,7 +1241,6 @@ static void xprt_destroy(struct rpc_xprt *xprt)
rpc_destroy_wait_queue(&xprt->binding);
rpc_destroy_wait_queue(&xprt->pending);
rpc_destroy_wait_queue(&xprt->sending);
rpc_destroy_wait_queue(&xprt->resend);
rpc_destroy_wait_queue(&xprt->backlog);
cancel_work_sync(&xprt->task_cleanup);
/*

View file

@ -283,6 +283,7 @@ xprt_setup_rdma(struct xprt_create *args)
}
xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
xprt_rdma_slot_table_entries,
xprt_rdma_slot_table_entries);
if (xprt == NULL) {
dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
@ -452,9 +453,8 @@ xprt_rdma_connect(struct rpc_task *task)
}
static int
xprt_rdma_reserve_xprt(struct rpc_task *task)
xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
@ -466,7 +466,7 @@ xprt_rdma_reserve_xprt(struct rpc_task *task)
BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
}
xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
return xprt_reserve_xprt_cong(task);
return xprt_reserve_xprt_cong(xprt, task);
}
/*

View file

@ -109,7 +109,7 @@ struct rpcrdma_ep {
*/
/* temporary static scatter/gather max */
#define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */
#define RPCRDMA_MAX_DATA_SEGS (64) /* max scatter/gather */
#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */
#define MAX_RPCRDMAHDR (\
/* max supported RPC/RDMA header */ \

View file

@ -37,7 +37,7 @@
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/xprtsock.h>
#include <linux/file.h>
#ifdef CONFIG_NFS_V4_1
#ifdef CONFIG_SUNRPC_BACKCHANNEL
#include <linux/sunrpc/bc_xprt.h>
#endif
@ -54,7 +54,8 @@ static void xs_close(struct rpc_xprt *xprt);
* xprtsock tunables
*/
unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;
unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE;
unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE;
unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
@ -75,6 +76,7 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT;
static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
@ -103,6 +105,15 @@ static ctl_table xs_tunables_table[] = {
.extra1 = &min_slot_table_size,
.extra2 = &max_slot_table_size
},
{
.procname = "tcp_max_slot_table_entries",
.data = &xprt_max_tcp_slot_table_entries,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_slot_table_size,
.extra2 = &max_tcp_slot_table_limit
},
{
.procname = "min_resvport",
.data = &xprt_min_resvport,
@ -755,6 +766,8 @@ static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
if (task == NULL)
goto out_release;
req = task->tk_rqstp;
if (req == NULL)
goto out_release;
if (req->rq_bytes_sent == 0)
goto out_release;
if (req->rq_bytes_sent == req->rq_snd_buf.len)
@ -1236,7 +1249,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
return 0;
}
#if defined(CONFIG_NFS_V4_1)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
* Obtains an rpc_rqst previously allocated and invokes the common
* tcp read code to read the data. The result is placed in the callback
@ -1299,7 +1312,7 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
{
return xs_tcp_read_reply(xprt, desc);
}
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/*
* Read data off the transport. This can be either an RPC_CALL or an
@ -2489,7 +2502,8 @@ static int xs_init_anyaddr(const int family, struct sockaddr *sap)
}
static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
unsigned int slot_table_size)
unsigned int slot_table_size,
unsigned int max_slot_table_size)
{
struct rpc_xprt *xprt;
struct sock_xprt *new;
@ -2499,7 +2513,8 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
return ERR_PTR(-EBADF);
}
xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size);
xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size,
max_slot_table_size);
if (xprt == NULL) {
dprintk("RPC: xs_setup_xprt: couldn't allocate "
"rpc_xprt\n");
@ -2541,7 +2556,8 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
struct rpc_xprt *xprt;
struct rpc_xprt *ret;
xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
xprt_max_tcp_slot_table_entries);
if (IS_ERR(xprt))
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
@ -2605,7 +2621,8 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
struct sock_xprt *transport;
struct rpc_xprt *ret;
xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries);
xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries,
xprt_udp_slot_table_entries);
if (IS_ERR(xprt))
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
@ -2681,7 +2698,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
struct sock_xprt *transport;
struct rpc_xprt *ret;
xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
xprt_max_tcp_slot_table_entries);
if (IS_ERR(xprt))
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
@ -2760,7 +2778,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
*/
return args->bc_xprt->xpt_bc_xprt;
}
xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
xprt_tcp_slot_table_entries);
if (IS_ERR(xprt))
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
@ -2947,8 +2966,26 @@ static struct kernel_param_ops param_ops_slot_table_size = {
#define param_check_slot_table_size(name, p) \
__param_check(name, p, unsigned int);
static int param_set_max_slot_table_size(const char *val,
const struct kernel_param *kp)
{
return param_set_uint_minmax(val, kp,
RPC_MIN_SLOT_TABLE,
RPC_MAX_SLOT_TABLE_LIMIT);
}
static struct kernel_param_ops param_ops_max_slot_table_size = {
.set = param_set_max_slot_table_size,
.get = param_get_uint,
};
#define param_check_max_slot_table_size(name, p) \
__param_check(name, p, unsigned int);
module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries,
slot_table_size, 0644);
module_param_named(tcp_max_slot_table_entries, xprt_max_tcp_slot_table_entries,
max_slot_table_size, 0644);
module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries,
slot_table_size, 0644);