From 9a1b6bf818e74bb7aabaecb59492b739f2f4d742 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Aug 2013 12:06:12 -0400 Subject: LOCKD: Don't call utsname()->nodename from nlmclnt_setlockargs Firstly, nlmclnt_setlockargs can be called from a reclaimer thread, in which case we're in entirely the wrong namespace. Secondly, commit 8aac62706adaaf0fab02c4327761561c8bda9448 (move exit_task_namespaces() outside of exit_notify()) now means that exit_task_work() is called after exit_task_namespaces(), which triggers an Oops when we're freeing up the locks. Fix this by ensuring that we initialise the nlm_host's rpc_client at mount time, so that the cl_nodename field is initialised to the value of utsname()->nodename that the net namespace uses. Then replace the lockd callers of utsname()->nodename. Signed-off-by: Trond Myklebust Cc: Toralf Förster Cc: Oleg Nesterov Cc: Nix Cc: Jeff Layton Cc: stable@vger.kernel.org # 3.10.x --- fs/lockd/clntlock.c | 13 +++++++++---- fs/lockd/clntproc.c | 5 +++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 01bfe7662751..41e491b8e5d7 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -64,12 +64,17 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) nlm_init->protocol, nlm_version, nlm_init->hostname, nlm_init->noresvport, nlm_init->net); - if (host == NULL) { - lockd_down(nlm_init->net); - return ERR_PTR(-ENOLCK); - } + if (host == NULL) + goto out_nohost; + if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL) + goto out_nobind; return host; +out_nobind: + nlmclnt_release_host(host); +out_nohost: + lockd_down(nlm_init->net); + return ERR_PTR(-ENOLCK); } EXPORT_SYMBOL_GPL(nlmclnt_init); diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 9760ecb9b60f..acd394716349 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -125,14 +125,15 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) { struct nlm_args *argp = &req->a_args; struct nlm_lock *lock = &argp->lock; + char *nodename = req->a_host->h_rpcclnt->cl_nodename; nlmclnt_next_cookie(&argp->cookie); memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh)); - lock->caller = utsname()->nodename; + lock->caller = nodename; lock->oh.data = req->a_owner; lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", (unsigned int)fl->fl_u.nfs_fl.owner->pid, - utsname()->nodename); + nodename); lock->svid = fl->fl_u.nfs_fl.owner->pid; lock->fl.fl_start = fl->fl_start; lock->fl.fl_end = fl->fl_end; -- cgit v1.2.3-55-g7522 From 00326ed6442c66021cd4b5e19e80f3e2027d5d42 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Aug 2013 14:10:43 -0400 Subject: SUNRPC: Don't auto-disconnect from the local rpcbind socket There is no need for the kernel to time out the AF_LOCAL connection to the rpcbind socket, and doing so is problematic because when it is time to reconnect, our process may no longer be using the same mount namespace. Reported-by: Nix Signed-off-by: Trond Myklebust Cc: Jeff Layton Cc: stable@vger.kernel.org # 3.9.x --- net/sunrpc/rpcb_clnt.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 3df764dc330c..b0f723227157 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -238,6 +238,14 @@ static int rpcb_create_local_unix(struct net *net) .program = &rpcb_program, .version = RPCBVERS_2, .authflavor = RPC_AUTH_NULL, + /* + * We turn off the idle timeout to prevent the kernel + * from automatically disconnecting the socket. + * Otherwise, we'd have to cache the mount namespace + * of the caller and somehow pass that to the socket + * reconnect code. + */ + .flags = RPC_CLNT_CREATE_NO_IDLE_TIMEOUT, }; struct rpc_clnt *clnt, *clnt4; int result = 0; -- cgit v1.2.3-55-g7522 From 786615bc1ce84150ded80daea6bd9f6297f48e73 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Aug 2013 16:04:47 -0400 Subject: SUNRPC: If the rpcbind channel is disconnected, fail the call to unregister If rpcbind causes our connection to the AF_LOCAL socket to close after we've registered a service, then we want to be careful about reconnecting since the mount namespace may have changed. By simply refusing to reconnect the AF_LOCAL socket in the case of unregister, we avoid the need to somehow save the mount namespace. While this may lead to some services not unregistering properly, it should be safe. Signed-off-by: Trond Myklebust Cc: Nix Cc: Jeff Layton Cc: stable@vger.kernel.org # 3.9.x --- include/linux/sunrpc/sched.h | 1 + net/sunrpc/clnt.c | 4 ++++ net/sunrpc/netns.h | 1 + net/sunrpc/rpcb_clnt.c | 40 +++++++++++++++++++++++++++------------- 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 6d870353674a..1821445708d6 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -121,6 +121,7 @@ struct rpc_task_setup { #define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ #define RPC_TASK_SENT 0x0800 /* message was sent */ #define RPC_TASK_TIMEOUT 0x1000 /* fail with ETIMEDOUT on timeout */ +#define RPC_TASK_NOCONNECT 0x2000 /* return ENOTCONN if not connected */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 74f6a704e374..ecbc4e3d83ad 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1660,6 +1660,10 @@ call_connect(struct rpc_task *task) task->tk_action = call_connect_status; if (task->tk_status < 0) return; + if (task->tk_flags & RPC_TASK_NOCONNECT) { + rpc_exit(task, -ENOTCONN); + return; + } xprt_connect(task); } } diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index 74d948f5d5a1..779742cfc1ff 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -23,6 +23,7 @@ struct sunrpc_net { struct rpc_clnt *rpcb_local_clnt4; spinlock_t rpcb_clnt_lock; unsigned int rpcb_users; + unsigned int rpcb_is_af_local : 1; struct mutex gssp_lock; wait_queue_head_t gssp_wq; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index b0f723227157..1891a1022c17 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -204,13 +204,15 @@ void rpcb_put_local(struct net *net) } static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, - struct rpc_clnt *clnt4) + struct rpc_clnt *clnt4, + bool is_af_local) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); /* Protected by rpcb_create_local_mutex */ sn->rpcb_local_clnt = clnt; sn->rpcb_local_clnt4 = clnt4; + sn->rpcb_is_af_local = is_af_local ? 1 : 0; smp_wmb(); sn->rpcb_users = 1; dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " @@ -271,7 +273,7 @@ static int rpcb_create_local_unix(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(net, clnt, clnt4, true); out: return result; @@ -323,7 +325,7 @@ static int rpcb_create_local_net(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(net, clnt, clnt4, false); out: return result; @@ -384,13 +386,16 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname, return rpc_create(&args); } -static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) +static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, struct rpc_message *msg, bool is_set) { - int result, error = 0; + int flags = RPC_TASK_NOCONNECT; + int error, result = 0; + if (is_set || !sn->rpcb_is_af_local) + flags = RPC_TASK_SOFTCONN; msg->rpc_resp = &result; - error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN); + error = rpc_call_sync(clnt, msg, flags); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -447,16 +452,19 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short .rpc_argp = &map, }; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + bool is_set = false; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), prog, vers, prot, port); msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; + is_set = true; + } - return rpcb_register_call(sn->rpcb_local_clnt, &msg); + return rpcb_register_call(sn, sn->rpcb_local_clnt, &msg, is_set); } /* @@ -469,6 +477,7 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin->sin_port); + bool is_set = false; int result; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -479,10 +488,12 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, map->r_addr, map->r_netid); msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + is_set = true; + } - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set); kfree(map->r_addr); return result; } @@ -497,6 +508,7 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin6->sin6_port); + bool is_set = false; int result; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -507,10 +519,12 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, map->r_addr, map->r_netid); msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + is_set = true; + } - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set); kfree(map->r_addr); return result; } @@ -527,7 +541,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - return rpcb_register_call(sn->rpcb_local_clnt4, msg); + return rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, false); } /** -- cgit v1.2.3-55-g7522 From f8806c843f88a6b7d657cf24c3682bc2efda6fdb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Aug 2013 13:26:31 -0400 Subject: NFS: Fix writeback performance issue on cache invalidation If a cache invalidation is triggered, and we happen to have a lot of writebacks cached at the time, then the call to invalidate_inode_pages2() will end up calling ->launder_page() on each and every dirty page in order to sync its contents to disk, thus defeating write coalescing. The following patch ensures that we try to sync the inode to disk before calling invalidate_inode_pages2() so that we do the writeback as efficiently as possible. Reported-by: William Dauchy Reported-by: Pascal Bouchareine Signed-off-by: Trond Myklebust Tested-by: William Dauchy Reviewed-by: Jeff Layton --- fs/nfs/inode.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index af6e806044d7..3ea4f641effc 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -963,9 +963,15 @@ EXPORT_SYMBOL_GPL(nfs_revalidate_inode); static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); - + int ret; + if (mapping->nrpages != 0) { - int ret = invalidate_inode_pages2(mapping); + if (S_ISREG(inode->i_mode)) { + ret = nfs_sync_mapping(mapping); + if (ret < 0) + return ret; + } + ret = invalidate_inode_pages2(mapping); if (ret < 0) return ret; } -- cgit v1.2.3-55-g7522 From e890db0104826742818cbfb8fdb3000a38a9b97c Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Wed, 31 Jul 2013 10:01:41 -0400 Subject: NFSv4: Fix the sync mount option for nfs4 mounts The sync mount option stopped working for NFSv4 mounts after commit c02d7adf8c5429727a98bad1d039bccad4c61c50 (NFSv4: Replace nfs4_path_walk() with FS path lookup in a private namespace). If MS_SYNCHRONOUS is set in the super_block that we're cloning from, then it should be set in the new super_block as well. Signed-off-by: Scott Mayhew Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 71fdc0dfa0d2..f6db66d8f647 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2478,6 +2478,10 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, if (server->flags & NFS_MOUNT_NOAC) sb_mntdata.mntflags |= MS_SYNCHRONOUS; + if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL) + if (mount_info->cloned->sb->s_flags & MS_SYNCHRONOUS) + sb_mntdata.mntflags |= MS_SYNCHRONOUS; + /* Get a superblock - note that we may end up sharing one that already exists */ s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); if (IS_ERR(s)) { -- cgit v1.2.3-55-g7522 From eddffa40841c0249678ee5551839e369baa97314 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 7 Aug 2013 12:17:19 -0400 Subject: NFS: Remove unnecessary call to nfs_setsecurity in nfs_fhget() We only need to call it on the creation of the inode. Reported-by: Julia Lawall Cc: Steve Dickson Cc: Dave Quigley Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3ea4f641effc..941246f2b43d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -463,7 +463,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st unlock_new_inode(inode); } else nfs_refresh_inode(inode, fattr); - nfs_setsecurity(inode, fattr, label); dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), -- cgit v1.2.3-55-g7522 From b72888cb0ba63b2dfc6c8d3cd78a7fea584bebc6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 7 Aug 2013 20:38:07 -0400 Subject: NFSv4: Fix up nfs4_proc_lookup_mountpoint Currently, we do not check the return value of client = rpc_clone_client(), nor do we shut down the resulting cloned rpc_clnt in the case where a NFS4ERR_WRONGSEC has caused nfs4_proc_lookup_common() to replace the original value of 'client' (causing a memory leak). Fix both issues and simplify the code by moving the call to rpc_clone_client() until after nfs4_proc_lookup_common() has done its business. Reported-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cf11799297c4..108a774095f7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3071,15 +3071,13 @@ struct rpc_clnt * nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { + struct rpc_clnt *client = NFS_CLIENT(dir); int status; - struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir)); status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL); - if (status < 0) { - rpc_shutdown_client(client); + if (status < 0) return ERR_PTR(status); - } - return client; + return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client; } static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) -- cgit v1.2.3-55-g7522