From fe238e601d2519f259103ab65caea3b077ed7b39 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Fri, 1 Apr 2016 13:45:09 -0400 Subject: NFS: Save struct inode * inside nfs_commit_info to clarify usage of i_lock Commit ea2cf22 created nfs_commit_info and saved &inode->i_lock inside this NFS specific structure. This obscures the usage of i_lock. Instead, save struct inode * so later it's clear the spinlock taken is i_lock. Should be no functional change. Signed-off-by: Dave Wysochanski Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/nfs_xdr.h') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d320906cf13e..cb9982d8f38f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1431,7 +1431,7 @@ struct nfs_commit_completion_ops { }; struct nfs_commit_info { - spinlock_t *lock; /* inode->i_lock */ + struct inode *inode; /* Needed for inode->i_lock */ struct nfs_mds_commit_info *mds; struct pnfs_ds_commit_info *ds; struct nfs_direct_req *dreq; /* O_DIRECT request */ -- cgit v1.2.3-55-g7522 From 2e72448b07dc3ff1b7593e9bfff91db182262857 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 21 May 2013 16:53:03 -0400 Subject: NFS: Add COPY nfs operation This adds the copy_range file_ops function pointer used by the sys_copy_range() function call. This patch only implements sync copies, so if an async copy happens we decode the stateid and ignore it. Signed-off-by: Anna Schumaker --- fs/nfs/nfs42.h | 1 + fs/nfs/nfs42proc.c | 105 +++++++++++++++++++++++++++++++++ fs/nfs/nfs42xdr.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4file.c | 23 ++++++++ fs/nfs/nfs4proc.c | 1 + fs/nfs/nfs4xdr.c | 1 + include/linux/nfs4.h | 1 + include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 26 +++++++++ 9 files changed, 305 insertions(+) (limited to 'include/linux/nfs_xdr.h') diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index b587ccd31083..b6cd15314bab 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -13,6 +13,7 @@ /* nfs4.2proc.c */ int nfs42_proc_allocate(struct file *, loff_t, loff_t); +ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t); int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); int nfs42_proc_layoutstats_generic(struct nfs_server *, diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index dff83460e5a6..579ee20e4120 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -126,6 +126,111 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) return err; } +static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src, + struct nfs_lock_context *src_lock, + struct file *dst, loff_t pos_dst, + struct nfs_lock_context *dst_lock, + size_t count) +{ + struct nfs42_copy_args args = { + .src_fh = NFS_FH(file_inode(src)), + .src_pos = pos_src, + .dst_fh = NFS_FH(file_inode(dst)), + .dst_pos = pos_dst, + .count = count, + }; + struct nfs42_copy_res res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY], + .rpc_argp = &args, + .rpc_resp = &res, + }; + struct inode *dst_inode = file_inode(dst); + struct nfs_server *server = NFS_SERVER(dst_inode); + int status; + + status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context, + src_lock, FMODE_READ); + if (status) + return status; + + status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context, + dst_lock, FMODE_WRITE); + if (status) + return status; + + status = nfs4_call_sync(server->client, server, &msg, + &args.seq_args, &res.seq_res, 0); + if (status == -ENOTSUPP) + server->caps &= ~NFS_CAP_COPY; + if (status) + return status; + + if (res.write_res.verifier.committed != NFS_FILE_SYNC) { + status = nfs_commit_file(dst, &res.write_res.verifier.verifier); + if (status) + return status; + } + + truncate_pagecache_range(dst_inode, pos_dst, + pos_dst + res.write_res.count); + + return res.write_res.count; +} + +ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, + struct file *dst, loff_t pos_dst, + size_t count) +{ + struct nfs_server *server = NFS_SERVER(file_inode(dst)); + struct nfs_lock_context *src_lock; + struct nfs_lock_context *dst_lock; + struct nfs4_exception src_exception = { }; + struct nfs4_exception dst_exception = { }; + ssize_t err, err2; + + if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY)) + return -EOPNOTSUPP; + + src_lock = nfs_get_lock_context(nfs_file_open_context(src)); + if (IS_ERR(src_lock)) + return PTR_ERR(src_lock); + + src_exception.inode = file_inode(src); + src_exception.state = src_lock->open_context->state; + + dst_lock = nfs_get_lock_context(nfs_file_open_context(dst)); + if (IS_ERR(dst_lock)) { + err = PTR_ERR(dst_lock); + goto out_put_src_lock; + } + + dst_exception.inode = file_inode(dst); + dst_exception.state = dst_lock->open_context->state; + + do { + mutex_lock(&file_inode(dst)->i_mutex); + err = _nfs42_proc_copy(src, pos_src, src_lock, + dst, pos_dst, dst_lock, count); + mutex_unlock(&file_inode(dst)->i_mutex); + + if (err == -ENOTSUPP) { + err = -EOPNOTSUPP; + break; + } + + err2 = nfs4_handle_exception(server, err, &src_exception); + err = nfs4_handle_exception(server, err, &dst_exception); + if (!err) + err = err2; + } while (src_exception.retry || dst_exception.retry); + + nfs_put_lock_context(dst_lock); +out_put_src_lock: + nfs_put_lock_context(src_lock); + return err; +} + static loff_t _nfs42_proc_llseek(struct file *filep, struct nfs_lock_context *lock, loff_t offset, int whence) { diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 0ca482a51e53..6dc6f2aea0d6 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -9,9 +9,22 @@ #define encode_fallocate_maxsz (encode_stateid_maxsz + \ 2 /* offset */ + \ 2 /* length */) +#define NFS42_WRITE_RES_SIZE (1 /* wr_callback_id size */ +\ + XDR_QUADLEN(NFS4_STATEID_SIZE) + \ + 2 /* wr_count */ + \ + 1 /* wr_committed */ + \ + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) #define encode_allocate_maxsz (op_encode_hdr_maxsz + \ encode_fallocate_maxsz) #define decode_allocate_maxsz (op_decode_hdr_maxsz) +#define encode_copy_maxsz (op_encode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_STATEID_SIZE) + \ + XDR_QUADLEN(NFS4_STATEID_SIZE) + \ + 2 + 2 + 2 + 1 + 1 + 1) +#define decode_copy_maxsz (op_decode_hdr_maxsz + \ + NFS42_WRITE_RES_SIZE + \ + 1 /* cr_consecutive */ + \ + 1 /* cr_synchronous */) #define encode_deallocate_maxsz (op_encode_hdr_maxsz + \ encode_fallocate_maxsz) #define decode_deallocate_maxsz (op_decode_hdr_maxsz) @@ -49,6 +62,16 @@ decode_putfh_maxsz + \ decode_allocate_maxsz + \ decode_getattr_maxsz) +#define NFS4_enc_copy_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_putfh_maxsz + \ + encode_copy_maxsz) +#define NFS4_dec_copy_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_putfh_maxsz + \ + decode_copy_maxsz) #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_deallocate_maxsz + \ @@ -102,6 +125,23 @@ static void encode_allocate(struct xdr_stream *xdr, encode_fallocate(xdr, args); } +static void encode_copy(struct xdr_stream *xdr, + struct nfs42_copy_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_COPY, decode_copy_maxsz, hdr); + encode_nfs4_stateid(xdr, &args->src_stateid); + encode_nfs4_stateid(xdr, &args->dst_stateid); + + encode_uint64(xdr, args->src_pos); + encode_uint64(xdr, args->dst_pos); + encode_uint64(xdr, args->count); + + encode_uint32(xdr, 1); /* consecutive = true */ + encode_uint32(xdr, 1); /* synchronous = true */ + encode_uint32(xdr, 0); /* src server list */ +} + static void encode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_args *args, struct compound_hdr *hdr) @@ -181,6 +221,26 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode COPY request + */ +static void nfs4_xdr_enc_copy(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_copy_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->src_fh, &hdr); + encode_savefh(xdr, &hdr); + encode_putfh(xdr, args->dst_fh, &hdr); + encode_copy(xdr, args, &hdr); + encode_nops(&hdr); +} + /* * Encode DEALLOCATE request */ @@ -266,6 +326,62 @@ static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) return decode_op_hdr(xdr, OP_ALLOCATE); } +static int decode_write_response(struct xdr_stream *xdr, + struct nfs42_write_res *res) +{ + __be32 *p; + int stateids; + + p = xdr_inline_decode(xdr, 4 + 8 + 4); + if (unlikely(!p)) + goto out_overflow; + + stateids = be32_to_cpup(p++); + p = xdr_decode_hyper(p, &res->count); + res->verifier.committed = be32_to_cpup(p); + return decode_verifier(xdr, &res->verifier.verifier); + +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_copy_requirements(struct xdr_stream *xdr, + struct nfs42_copy_res *res) { + __be32 *p; + + p = xdr_inline_decode(xdr, 4 + 4); + if (unlikely(!p)) + goto out_overflow; + + res->consecutive = be32_to_cpup(p++); + res->synchronous = be32_to_cpup(p++); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_copy(struct xdr_stream *xdr, struct nfs42_copy_res *res) +{ + int status; + + status = decode_op_hdr(xdr, OP_COPY); + if (status == NFS4ERR_OFFLOAD_NO_REQS) { + status = decode_copy_requirements(xdr, res); + if (status) + return status; + return NFS4ERR_OFFLOAD_NO_REQS; + } else if (status) + return status; + + status = decode_write_response(xdr, &res->write_res); + if (status) + return status; + + return decode_copy_requirements(xdr, res); +} + static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_DEALLOCATE); @@ -330,6 +446,36 @@ out: return status; } +/* + * Decode COPY response + */ +static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_copy_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_savefh(xdr); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_copy(xdr, res); +out: + return status; +} + /* * Decode DEALLOCATE request */ diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index d0390516467c..014b0e41ace5 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -129,6 +129,28 @@ nfs4_file_flush(struct file *file, fl_owner_t id) } #ifdef CONFIG_NFS_V4_2 +static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t count, unsigned int flags) +{ + struct inode *in_inode = file_inode(file_in); + struct inode *out_inode = file_inode(file_out); + int ret; + + if (in_inode == out_inode) + return -EINVAL; + + /* flush any pending writes */ + ret = nfs_sync_inode(in_inode); + if (ret) + return ret; + ret = nfs_sync_inode(out_inode); + if (ret) + return ret; + + return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); +} + static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) { loff_t ret; @@ -243,6 +265,7 @@ const struct file_operations nfs4_file_operations = { .check_flags = nfs_check_flags, .setlease = simple_nosetlease, #ifdef CONFIG_NFS_V4_2 + .copy_file_range = nfs4_copy_file_range, .llseek = nfs4_file_llseek, .fallocate = nfs42_fallocate, .clone_file_range = nfs42_clone_file_range, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bc2676c95e1b..4e83385ea6a9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8797,6 +8797,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 | NFS_CAP_ALLOCATE + | NFS_CAP_COPY | NFS_CAP_DEALLOCATE | NFS_CAP_SEEK | NFS_CAP_LAYOUTSTATS diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 88474a4fc669..d1c96fc62c51 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7515,6 +7515,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(DEALLOCATE, enc_deallocate, dec_deallocate), PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), PROC(CLONE, enc_clone, dec_clone), + PROC(COPY, enc_copy, dec_copy), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 011433478a14..722509482e1a 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -504,6 +504,7 @@ enum { NFSPROC4_CLNT_DEALLOCATE, NFSPROC4_CLNT_LAYOUTSTATS, NFSPROC4_CLNT_CLONE, + NFSPROC4_CLNT_COPY, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 7fcc13c8cf1f..14a762d2734d 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -246,5 +246,6 @@ struct nfs_server { #define NFS_CAP_DEALLOCATE (1U << 21) #define NFS_CAP_LAYOUTSTATS (1U << 22) #define NFS_CAP_CLONE (1U << 23) +#define NFS_CAP_COPY (1U << 24) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index cb9982d8f38f..e70ed54dad94 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1343,6 +1343,32 @@ struct nfs42_falloc_res { const struct nfs_server *falloc_server; }; +struct nfs42_copy_args { + struct nfs4_sequence_args seq_args; + + struct nfs_fh *src_fh; + nfs4_stateid src_stateid; + u64 src_pos; + + struct nfs_fh *dst_fh; + nfs4_stateid dst_stateid; + u64 dst_pos; + + u64 count; +}; + +struct nfs42_write_res { + u64 count; + struct nfs_writeverf verifier; +}; + +struct nfs42_copy_res { + struct nfs4_sequence_res seq_res; + struct nfs42_write_res write_res; + bool consecutive; + bool synchronous; +}; + struct nfs42_seek_args { struct nfs4_sequence_args seq_args; -- cgit v1.2.3-55-g7522 From 183d9e7b112aaed0d19c16ffcf0f8c3a86dc71e0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 17 May 2016 12:28:47 -0400 Subject: pnfs: rework LAYOUTGET retry handling There are several problems in the way a stateid is selected for a LAYOUTGET operation: We pick a stateid to use in the RPC prepare op, but that makes it difficult to serialize LAYOUTGETs that use the open stateid. That serialization is done in pnfs_update_layout, which occurs well before the rpc_prepare operation. Between those two events, the i_lock is dropped and reacquired. pnfs_update_layout can find that the list has lsegs in it and not do any serialization, but then later pnfs_choose_layoutget_stateid ends up choosing the open stateid. This patch changes the client to select the stateid to use in the LAYOUTGET earlier, when we're searching for a usable layout segment. This way we can do it all while holding the i_lock the first time, and ensure that we serialize any LAYOUTGET call that uses a non-layout stateid. This also means a rework of how LAYOUTGET replies are handled, as we must now get the latest stateid if we want to retransmit in response to a retryable error. Most of those errors boil down to the fact that the layout state has changed in some fashion. Thus, what we really want to do is to re-search for a layout when it fails with a retryable error, so that we can avoid reissuing the RPC at all if possible. While the LAYOUTGET RPC is async, the initiating thread always waits for it to complete, so it's effectively synchronous anyway. Currently, when we need to retry a LAYOUTGET because of an error, we drive that retry via the rpc state machine. This means that once the call has been submitted, it runs until it completes. So, we must move the error handling for this RPC out of the rpc_call_done operation and into the caller. In order to handle errors like NFS4ERR_DELAY properly, we must also pass a pointer to the sliding timeout, which is now moved to the stack in pnfs_update_layout. The complicating errors are -NFS4ERR_RECALLCONFLICT and -NFS4ERR_LAYOUTTRYLATER, as those involve a timeout after which we give up and return NULL back to the caller. So, there is some special handling for those errors to ensure that the layers driving the retries can handle that appropriately. Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 115 ++++++++++++++++---------------------- fs/nfs/nfs4trace.h | 10 +++- fs/nfs/pnfs.c | 144 +++++++++++++++++++++++++----------------------- fs/nfs/pnfs.h | 6 +- include/linux/errno.h | 1 + include/linux/nfs4.h | 2 + include/linux/nfs_xdr.h | 2 - 7 files changed, 136 insertions(+), 144 deletions(-) (limited to 'include/linux/nfs_xdr.h') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2a29f5d12aeb..62d67f040906 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -427,6 +427,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server, case -NFS4ERR_DELAY: nfs_inc_server_stats(server, NFSIOS_DELAY); case -NFS4ERR_GRACE: + case -NFS4ERR_RECALLCONFLICT: exception->delay = 1; return 0; @@ -7847,40 +7848,34 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) struct nfs4_layoutget *lgp = calldata; struct nfs_server *server = NFS_SERVER(lgp->args.inode); struct nfs4_session *session = nfs4_get_session(server); - int ret; dprintk("--> %s\n", __func__); - /* Note the is a race here, where a CB_LAYOUTRECALL can come in - * right now covering the LAYOUTGET we are about to send. - * However, that is not so catastrophic, and there seems - * to be no way to prevent it completely. - */ - if (nfs41_setup_sequence(session, &lgp->args.seq_args, - &lgp->res.seq_res, task)) - return; - ret = pnfs_choose_layoutget_stateid(&lgp->args.stateid, - NFS_I(lgp->args.inode)->layout, - &lgp->args.range, - lgp->args.ctx->state); - if (ret < 0) - rpc_exit(task, ret); + nfs41_setup_sequence(session, &lgp->args.seq_args, + &lgp->res.seq_res, task); + dprintk("<-- %s\n", __func__); } static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) { struct nfs4_layoutget *lgp = calldata; + + dprintk("--> %s\n", __func__); + nfs41_sequence_done(task, &lgp->res.seq_res); + dprintk("<-- %s\n", __func__); +} + +static int +nfs4_layoutget_handle_exception(struct rpc_task *task, + struct nfs4_layoutget *lgp, struct nfs4_exception *exception) +{ struct inode *inode = lgp->args.inode; struct nfs_server *server = NFS_SERVER(inode); struct pnfs_layout_hdr *lo; - struct nfs4_state *state = NULL; - unsigned long timeo, now, giveup; + int status = task->tk_status; dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status); - if (!nfs41_sequence_done(task, &lgp->res.seq_res)) - goto out; - - switch (task->tk_status) { + switch (status) { case 0: goto out; @@ -7890,57 +7885,43 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) * retry go inband. */ case -NFS4ERR_LAYOUTUNAVAILABLE: - task->tk_status = -ENODATA; + status = -ENODATA; goto out; /* * NFS4ERR_BADLAYOUT means the MDS cannot return a layout of * length lgp->args.minlength != 0 (see RFC5661 section 18.43.3). */ case -NFS4ERR_BADLAYOUT: - goto out_overflow; + status = -EOVERFLOW; + goto out; /* * NFS4ERR_LAYOUTTRYLATER is a conflict with another client * (or clients) writing to the same RAID stripe except when * the minlength argument is 0 (see RFC5661 section 18.43.3). + * + * Treat it like we would RECALLCONFLICT -- we retry for a little + * while, and then eventually give up. */ case -NFS4ERR_LAYOUTTRYLATER: - if (lgp->args.minlength == 0) - goto out_overflow; - /* - * NFS4ERR_RECALLCONFLICT is when conflict with self (must recall - * existing layout before getting a new one). - */ - case -NFS4ERR_RECALLCONFLICT: - timeo = rpc_get_timeout(task->tk_client); - giveup = lgp->args.timestamp + timeo; - now = jiffies; - if (time_after(giveup, now)) { - unsigned long delay; - - /* Delay for: - * - Not less then NFS4_POLL_RETRY_MIN. - * - One last time a jiffie before we give up - * - exponential backoff (time_now minus start_attempt) - */ - delay = max_t(unsigned long, NFS4_POLL_RETRY_MIN, - min((giveup - now - 1), - now - lgp->args.timestamp)); - - dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n", - __func__, delay); - rpc_delay(task, delay); - /* Do not call nfs4_async_handle_error() */ - goto out_restart; + if (lgp->args.minlength == 0) { + status = -EOVERFLOW; + goto out; } - break; + /* Fallthrough */ + case -NFS4ERR_RECALLCONFLICT: + nfs4_handle_exception(server, -NFS4ERR_RECALLCONFLICT, + exception); + status = -ERECALLCONFLICT; + goto out; case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: + exception->timeout = 0; spin_lock(&inode->i_lock); if (nfs4_stateid_match(&lgp->args.stateid, &lgp->args.ctx->state->stateid)) { spin_unlock(&inode->i_lock); /* If the open stateid was bad, then recover it. */ - state = lgp->args.ctx->state; + exception->state = lgp->args.ctx->state; break; } lo = NFS_I(inode)->layout; @@ -7958,20 +7939,16 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) pnfs_free_lseg_list(&head); } else spin_unlock(&inode->i_lock); - goto out_restart; + status = -EAGAIN; + goto out; } - if (nfs4_async_handle_error(task, server, state, &lgp->timeout) == -EAGAIN) - goto out_restart; + + status = nfs4_handle_exception(server, status, exception); + if (exception->retry) + status = -EAGAIN; out: dprintk("<-- %s\n", __func__); - return; -out_restart: - task->tk_status = 0; - rpc_restart_call_prepare(task); - return; -out_overflow: - task->tk_status = -EOVERFLOW; - goto out; + return status; } static size_t max_response_pages(struct nfs_server *server) @@ -8040,7 +8017,7 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { }; struct pnfs_layout_segment * -nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) +nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) { struct inode *inode = lgp->args.inode; struct nfs_server *server = NFS_SERVER(inode); @@ -8060,6 +8037,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) .flags = RPC_TASK_ASYNC, }; struct pnfs_layout_segment *lseg = NULL; + struct nfs4_exception exception = { .timeout = *timeout }; int status = 0; dprintk("--> %s\n", __func__); @@ -8073,7 +8051,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) return ERR_PTR(-ENOMEM); } lgp->args.layout.pglen = max_pages * PAGE_SIZE; - lgp->args.timestamp = jiffies; lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; @@ -8083,13 +8060,17 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) if (IS_ERR(task)) return ERR_CAST(task); status = nfs4_wait_for_completion_rpc_task(task); - if (status == 0) - status = task->tk_status; + if (status == 0) { + status = nfs4_layoutget_handle_exception(task, lgp, &exception); + *timeout = exception.timeout; + } + trace_nfs4_layoutget(lgp->args.ctx, &lgp->args.range, &lgp->res.range, &lgp->res.stateid, status); + /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ if (status == 0 && lgp->res.layoutp->len) lseg = pnfs_layout_process(lgp); diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 2c8d05dae5b1..9c150b153782 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1520,6 +1520,8 @@ DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn_on_close); { PNFS_UPDATE_LAYOUT_FOUND_CACHED, "found cached" }, \ { PNFS_UPDATE_LAYOUT_RETURN, "layoutreturn" }, \ { PNFS_UPDATE_LAYOUT_BLOCKED, "layouts blocked" }, \ + { PNFS_UPDATE_LAYOUT_INVALID_OPEN, "invalid open" }, \ + { PNFS_UPDATE_LAYOUT_RETRY, "retrying" }, \ { PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, "sent layoutget" }) TRACE_EVENT(pnfs_update_layout, @@ -1528,9 +1530,10 @@ TRACE_EVENT(pnfs_update_layout, u64 count, enum pnfs_iomode iomode, struct pnfs_layout_hdr *lo, + struct pnfs_layout_segment *lseg, enum pnfs_update_layout_reason reason ), - TP_ARGS(inode, pos, count, iomode, lo, reason), + TP_ARGS(inode, pos, count, iomode, lo, lseg, reason), TP_STRUCT__entry( __field(dev_t, dev) __field(u64, fileid) @@ -1540,6 +1543,7 @@ TRACE_EVENT(pnfs_update_layout, __field(enum pnfs_iomode, iomode) __field(int, layoutstateid_seq) __field(u32, layoutstateid_hash) + __field(long, lseg) __field(enum pnfs_update_layout_reason, reason) ), TP_fast_assign( @@ -1559,11 +1563,12 @@ TRACE_EVENT(pnfs_update_layout, __entry->layoutstateid_seq = 0; __entry->layoutstateid_hash = 0; } + __entry->lseg = (long)lseg; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " "iomode=%s pos=%llu count=%llu " - "layoutstateid=%d:0x%08x (%s)", + "layoutstateid=%d:0x%08x lseg=0x%lx (%s)", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1571,6 +1576,7 @@ TRACE_EVENT(pnfs_update_layout, (unsigned long long)__entry->pos, (unsigned long long)__entry->count, __entry->layoutstateid_seq, __entry->layoutstateid_hash, + __entry->lseg, show_pnfs_update_layout_reason(__entry->reason) ) ); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2a609af845fe..46339a7fb191 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -796,45 +796,18 @@ pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo) test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); } -int -pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, - const struct pnfs_layout_range *range, - struct nfs4_state *open_state) -{ - int status = 0; - - dprintk("--> %s\n", __func__); - spin_lock(&lo->plh_inode->i_lock); - if (pnfs_layoutgets_blocked(lo)) { - status = -EAGAIN; - } else if (!nfs4_valid_open_stateid(open_state)) { - status = -EBADF; - } else if (list_empty(&lo->plh_segs) || - test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { - int seq; - - do { - seq = read_seqbegin(&open_state->seqlock); - nfs4_stateid_copy(dst, &open_state->stateid); - } while (read_seqretry(&open_state->seqlock, seq)); - } else - nfs4_stateid_copy(dst, &lo->plh_stateid); - spin_unlock(&lo->plh_inode->i_lock); - dprintk("<-- %s\n", __func__); - return status; -} - /* -* Get layout from server. -* for now, assume that whole file layouts are requested. -* arg->offset: 0 -* arg->length: all ones -*/ + * Get layout from server. + * for now, assume that whole file layouts are requested. + * arg->offset: 0 + * arg->length: all ones + */ static struct pnfs_layout_segment * send_layoutget(struct pnfs_layout_hdr *lo, struct nfs_open_context *ctx, + nfs4_stateid *stateid, const struct pnfs_layout_range *range, - gfp_t gfp_flags) + long *timeout, gfp_t gfp_flags) { struct inode *ino = lo->plh_inode; struct nfs_server *server = NFS_SERVER(ino); @@ -868,10 +841,11 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.type = server->pnfs_curr_ld->id; lgp->args.inode = ino; lgp->args.ctx = get_nfs_open_context(ctx); + nfs4_stateid_copy(&lgp->args.stateid, stateid); lgp->gfp_flags = gfp_flags; lgp->cred = lo->plh_lc_cred; - return nfs4_proc_layoutget(lgp, gfp_flags); + return nfs4_proc_layoutget(lgp, timeout, gfp_flags); } static void pnfs_clear_layoutcommit(struct inode *inode, @@ -1511,27 +1485,30 @@ pnfs_update_layout(struct inode *ino, .offset = pos, .length = count, }; - unsigned pg_offset; + unsigned pg_offset, seq; struct nfs_server *server = NFS_SERVER(ino); struct nfs_client *clp = server->nfs_client; - struct pnfs_layout_hdr *lo; + struct pnfs_layout_hdr *lo = NULL; struct pnfs_layout_segment *lseg = NULL; + nfs4_stateid stateid; + long timeout = 0; + unsigned long giveup = jiffies + rpc_get_timeout(server->client); bool first; if (!pnfs_enabled_sb(NFS_SERVER(ino))) { - trace_pnfs_update_layout(ino, pos, count, iomode, NULL, + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_NO_PNFS); goto out; } if (iomode == IOMODE_READ && i_size_read(ino) == 0) { - trace_pnfs_update_layout(ino, pos, count, iomode, NULL, + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RD_ZEROLEN); goto out; } if (pnfs_within_mdsthreshold(ctx, ino, iomode)) { - trace_pnfs_update_layout(ino, pos, count, iomode, NULL, + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_MDSTHRESH); goto out; } @@ -1542,14 +1519,14 @@ lookup_again: lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); if (lo == NULL) { spin_unlock(&ino->i_lock); - trace_pnfs_update_layout(ino, pos, count, iomode, NULL, + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_NOMEM); goto out; } /* Do we even need to bother with this? */ if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { - trace_pnfs_update_layout(ino, pos, count, iomode, lo, + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_BULK_RECALL); dprintk("%s matches recall, use MDS\n", __func__); goto out_unlock; @@ -1557,14 +1534,34 @@ lookup_again: /* if LAYOUTGET already failed once we don't try again */ if (pnfs_layout_io_test_failed(lo, iomode)) { - trace_pnfs_update_layout(ino, pos, count, iomode, lo, + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_IO_TEST_FAIL); goto out_unlock; } - first = list_empty(&lo->plh_segs); - if (first) { - /* The first layoutget for the file. Need to serialize per + lseg = pnfs_find_lseg(lo, &arg); + if (lseg) { + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, + PNFS_UPDATE_LAYOUT_FOUND_CACHED); + goto out_unlock; + } + + if (!nfs4_valid_open_stateid(ctx->state)) { + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, + PNFS_UPDATE_LAYOUT_INVALID_OPEN); + goto out_unlock; + } + + /* + * Choose a stateid for the LAYOUTGET. If we don't have a layout + * stateid, or it has been invalidated, then we must use the open + * stateid. + */ + if (lo->plh_stateid.seqid == 0 || + test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { + + /* + * The first layoutget for the file. Need to serialize per * RFC 5661 Errata 3208. */ if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, @@ -1573,18 +1570,17 @@ lookup_again: wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET, TASK_UNINTERRUPTIBLE); pnfs_put_layout_hdr(lo); + dprintk("%s retrying\n", __func__); goto lookup_again; } + + first = true; + do { + seq = read_seqbegin(&ctx->state->seqlock); + nfs4_stateid_copy(&stateid, &ctx->state->stateid); + } while (read_seqretry(&ctx->state->seqlock, seq)); } else { - /* Check to see if the layout for the given range - * already exists - */ - lseg = pnfs_find_lseg(lo, &arg); - if (lseg) { - trace_pnfs_update_layout(ino, pos, count, iomode, lo, - PNFS_UPDATE_LAYOUT_FOUND_CACHED); - goto out_unlock; - } + nfs4_stateid_copy(&stateid, &lo->plh_stateid); } /* @@ -1599,15 +1595,17 @@ lookup_again: pnfs_clear_first_layoutget(lo); pnfs_put_layout_hdr(lo); dprintk("%s retrying\n", __func__); + trace_pnfs_update_layout(ino, pos, count, iomode, lo, + lseg, PNFS_UPDATE_LAYOUT_RETRY); goto lookup_again; } - trace_pnfs_update_layout(ino, pos, count, iomode, lo, + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETURN); goto out_put_layout_hdr; } if (pnfs_layoutgets_blocked(lo)) { - trace_pnfs_update_layout(ino, pos, count, iomode, lo, + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_BLOCKED); goto out_unlock; } @@ -1632,26 +1630,36 @@ lookup_again: if (arg.length != NFS4_MAX_UINT64) arg.length = PAGE_ALIGN(arg.length); - lseg = send_layoutget(lo, ctx, &arg, gfp_flags); + lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags); + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, + PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); if (IS_ERR(lseg)) { - if (lseg == ERR_PTR(-EAGAIN)) { + switch(PTR_ERR(lseg)) { + case -ERECALLCONFLICT: + if (time_after(jiffies, giveup)) + lseg = NULL; + /* Fallthrough */ + case -EAGAIN: + pnfs_put_layout_hdr(lo); if (first) pnfs_clear_first_layoutget(lo); - pnfs_put_layout_hdr(lo); - goto lookup_again; - } - - if (!nfs_error_is_fatal(PTR_ERR(lseg))) { - pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); - lseg = NULL; + if (lseg) { + trace_pnfs_update_layout(ino, pos, count, + iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY); + goto lookup_again; + } + /* Fallthrough */ + default: + if (!nfs_error_is_fatal(PTR_ERR(lseg))) { + pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); + lseg = NULL; + } } } else { pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); } atomic_dec(&lo->plh_outstanding); - trace_pnfs_update_layout(ino, pos, count, iomode, lo, - PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); out_put_layout_hdr: if (first) pnfs_clear_first_layoutget(lo); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 971068b58647..f9f3331bef49 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -228,7 +228,7 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev, struct rpc_cred *cred); -extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); +extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); /* pnfs.c */ @@ -260,10 +260,6 @@ void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, bool update_barrier); -int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, - struct pnfs_layout_hdr *lo, - const struct pnfs_layout_range *range, - struct nfs4_state *open_state); int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, const struct pnfs_layout_range *recall_range, diff --git a/include/linux/errno.h b/include/linux/errno.h index 89627b9187f9..7ce9fb1b7d28 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h @@ -28,5 +28,6 @@ #define EBADTYPE 527 /* Type not supported by server */ #define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ #define EIOCBQUEUED 529 /* iocb queued, will get completion event */ +#define ERECALLCONFLICT 530 /* conflict with recalled state */ #endif diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index e1692c96cbc8..bfed6b367350 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -637,7 +637,9 @@ enum pnfs_update_layout_reason { PNFS_UPDATE_LAYOUT_IO_TEST_FAIL, PNFS_UPDATE_LAYOUT_FOUND_CACHED, PNFS_UPDATE_LAYOUT_RETURN, + PNFS_UPDATE_LAYOUT_RETRY, PNFS_UPDATE_LAYOUT_BLOCKED, + PNFS_UPDATE_LAYOUT_INVALID_OPEN, PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e70ed54dad94..ccb2928a0e64 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -233,7 +233,6 @@ struct nfs4_layoutget_args { struct inode *inode; struct nfs_open_context *ctx; nfs4_stateid stateid; - unsigned long timestamp; struct nfs4_layoutdriver_data layout; }; @@ -251,7 +250,6 @@ struct nfs4_layoutget { struct nfs4_layoutget_res res; struct rpc_cred *cred; gfp_t gfp_flags; - long timeout; }; struct nfs4_getdeviceinfo_args { -- cgit v1.2.3-55-g7522