From b0c6108ecf64fce3987f80c1610472a56d480d9a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 May 2018 10:55:01 -0400 Subject: nfs_instantiate(): prevent multiple aliases for directory inode Since NFS allows open-by-fhandle, we have to cope with the possibility of mkdir vs. open-by-guessed-handle races. A local filesystem could decide what the inumber of the new object will be and insert a locked inode with that inumber into icache _before_ the on-disk data structures begin to look good and unlock it only once it has a dentry alias, so that open-by-handle coming first would quietly fail and mkdir coming first would have open-by-handle grab its dentry. For NFS it's a non-starter - the icache key is server-supplied fhandle and we do not get that until the object has been fully created on server. We really have to deal with the possibility that open-by-handle gets the in-core inode and attaches a dentry to it before mkdir does. Solution: let nfs_mkdir() use d_splice_alias() to catch those. We can * get an error. Just return it to our caller. * get NULL - no preexisting dentry aliases, we'd just done what d_add() would've done. Success. * get a reference to preexisting alias. In that case the alias had been moved in place of nfs_mkdir() argument (and hashed there), while nfs_mkdir() argument is left unhashed negative. Which is just fine for ->mkdir() callers, all we need is to release the reference we'd got from d_splice_alias() and report success. Cc: Trond Myklebust Signed-off-by: Al Viro --- fs/nfs/dir.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 7a9c14426855..df6fd4e5b068 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1641,6 +1641,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, struct dentry *parent = dget_parent(dentry); struct inode *dir = d_inode(parent); struct inode *inode; + struct dentry *d; int error = -EACCES; d_drop(dentry); @@ -1662,10 +1663,12 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, goto out_error; } inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); - error = PTR_ERR(inode); - if (IS_ERR(inode)) + d = d_splice_alias(inode, dentry); + if (IS_ERR(d)) { + error = PTR_ERR(d); goto out_error; - d_add(dentry, inode); + } + dput(d); out: dput(parent); return 0; -- cgit v1.2.3-55-g7522 From c971e6a006175bd0f195c6346c4e8bc4089bec00 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 28 May 2018 18:27:19 -0400 Subject: kill d_instantiate_no_diralias() The only user is fuse_create_new_entry(), and there it's used to mitigate the same mkdir/open-by-handle race as in nfs_mkdir(). The same solution applies - unhash the mkdir argument, then call d_splice_alias() and if that returns a reference to preexisting alias, dput() and report success. ->mkdir() argument left unhashed negative with the preexisting alias moved in the right place is just fine from the ->mkdir() callers point of view. Cc: Miklos Szeredi Signed-off-by: Al Viro --- fs/dcache.c | 27 --------------------------- fs/fuse/dir.c | 15 +++++++++++---- include/linux/dcache.h | 1 - 3 files changed, 11 insertions(+), 32 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 0e8e5de3c48a..a7d9e7a4c283 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1899,33 +1899,6 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode) } EXPORT_SYMBOL(d_instantiate_new); -/** - * d_instantiate_no_diralias - instantiate a non-aliased dentry - * @entry: dentry to complete - * @inode: inode to attach to this dentry - * - * Fill in inode information in the entry. If a directory alias is found, then - * return an error (and drop inode). Together with d_materialise_unique() this - * guarantees that a directory inode may never have more than one alias. - */ -int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode) -{ - BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); - - security_d_instantiate(entry, inode); - spin_lock(&inode->i_lock); - if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) { - spin_unlock(&inode->i_lock); - iput(inode); - return -EBUSY; - } - __d_instantiate(entry, inode); - spin_unlock(&inode->i_lock); - - return 0; -} -EXPORT_SYMBOL(d_instantiate_no_diralias); - struct dentry *d_make_root(struct inode *root_inode) { struct dentry *res = NULL; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 56231b31f806..4bbae6ac75c3 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -539,6 +539,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, { struct fuse_entry_out outarg; struct inode *inode; + struct dentry *d; int err; struct fuse_forget_link *forget; @@ -570,11 +571,17 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, } kfree(forget); - err = d_instantiate_no_diralias(entry, inode); - if (err) - return err; + d_drop(entry); + d = d_splice_alias(inode, entry); + if (IS_ERR(d)) + return PTR_ERR(d); - fuse_change_entry_timeout(entry, &outarg); + if (d) { + fuse_change_entry_timeout(d, &outarg); + dput(d); + } else { + fuse_change_entry_timeout(entry, &outarg); + } fuse_invalidate_attr(dir); return 0; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 66c6e17e61e5..0b83629a3d8f 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -227,7 +227,6 @@ extern void d_instantiate(struct dentry *, struct inode *); extern void d_instantiate_new(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *); -extern int d_instantiate_no_diralias(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); -- cgit v1.2.3-55-g7522 From c2b6d621c4ffe9936adf7a55c8b1c769672c306f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Jun 2018 15:53:17 -0400 Subject: new primitive: discard_new_inode() We don't want open-by-handle picking half-set-up in-core struct inode from e.g. mkdir() having failed halfway through. In other words, we don't want such inodes returned by iget_locked() on their way to extinction. However, we can't just have them unhashed - otherwise open-by-handle immediately *after* that would've ended up creating a new in-core inode over the on-disk one that is in process of being freed right under us. Solution: new flag (I_CREATING) set by insert_inode_locked() and removed by unlock_new_inode() and a new primitive (discard_new_inode()) to be used by such halfway-through-setup failure exits instead of unlock_new_inode() / iput() combinations. That primitive unlocks new inode, but leaves I_CREATING in place. iget_locked() treats finding an I_CREATING inode as failure (-ESTALE, once we sort out the error propagation). insert_inode_locked() treats the same as instant -EBUSY. ilookup() treats those as icache miss. [Fix by Dan Carpenter folded in] Signed-off-by: Al Viro --- fs/dcache.c | 2 +- fs/inode.c | 45 +++++++++++++++++++++++++++++++++++++++++---- include/linux/fs.h | 6 +++++- 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index a7d9e7a4c283..11b753d29409 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1892,7 +1892,7 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode) spin_lock(&inode->i_lock); __d_instantiate(entry, inode); WARN_ON(!(inode->i_state & I_NEW)); - inode->i_state &= ~I_NEW; + inode->i_state &= ~I_NEW & ~I_CREATING; smp_mb(); wake_up_bit(&inode->i_state, __I_NEW); spin_unlock(&inode->i_lock); diff --git a/fs/inode.c b/fs/inode.c index 2c300e981796..6cd2e7ba9f4d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -804,6 +804,10 @@ repeat: __wait_on_freeing_inode(inode); goto repeat; } + if (unlikely(inode->i_state & I_CREATING)) { + spin_unlock(&inode->i_lock); + return ERR_PTR(-ESTALE); + } __iget(inode); spin_unlock(&inode->i_lock); return inode; @@ -831,6 +835,10 @@ repeat: __wait_on_freeing_inode(inode); goto repeat; } + if (unlikely(inode->i_state & I_CREATING)) { + spin_unlock(&inode->i_lock); + return ERR_PTR(-ESTALE); + } __iget(inode); spin_unlock(&inode->i_lock); return inode; @@ -961,13 +969,26 @@ void unlock_new_inode(struct inode *inode) lockdep_annotate_inode_mutex_key(inode); spin_lock(&inode->i_lock); WARN_ON(!(inode->i_state & I_NEW)); - inode->i_state &= ~I_NEW; + inode->i_state &= ~I_NEW & ~I_CREATING; smp_mb(); wake_up_bit(&inode->i_state, __I_NEW); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(unlock_new_inode); +void discard_new_inode(struct inode *inode) +{ + lockdep_annotate_inode_mutex_key(inode); + spin_lock(&inode->i_lock); + WARN_ON(!(inode->i_state & I_NEW)); + inode->i_state &= ~I_NEW; + smp_mb(); + wake_up_bit(&inode->i_state, __I_NEW); + spin_unlock(&inode->i_lock); + iput(inode); +} +EXPORT_SYMBOL(discard_new_inode); + /** * lock_two_nondirectories - take two i_mutexes on non-directory objects * @@ -1039,6 +1060,8 @@ again: * Use the old inode instead of the preallocated one. */ spin_unlock(&inode_hash_lock); + if (IS_ERR(old)) + return NULL; wait_on_inode(old); if (unlikely(inode_unhashed(old))) { iput(old); @@ -1128,6 +1151,8 @@ again: inode = find_inode_fast(sb, head, ino); spin_unlock(&inode_hash_lock); if (inode) { + if (IS_ERR(inode)) + return NULL; wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); @@ -1165,6 +1190,8 @@ again: */ spin_unlock(&inode_hash_lock); destroy_inode(inode); + if (IS_ERR(old)) + return NULL; inode = old; wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { @@ -1282,7 +1309,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, inode = find_inode(sb, head, test, data); spin_unlock(&inode_hash_lock); - return inode; + return IS_ERR(inode) ? NULL : inode; } EXPORT_SYMBOL(ilookup5_nowait); @@ -1338,6 +1365,8 @@ again: spin_unlock(&inode_hash_lock); if (inode) { + if (IS_ERR(inode)) + return NULL; wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); @@ -1421,12 +1450,17 @@ int insert_inode_locked(struct inode *inode) } if (likely(!old)) { spin_lock(&inode->i_lock); - inode->i_state |= I_NEW; + inode->i_state |= I_NEW | I_CREATING; hlist_add_head(&inode->i_hash, head); spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); return 0; } + if (unlikely(old->i_state & I_CREATING)) { + spin_unlock(&old->i_lock); + spin_unlock(&inode_hash_lock); + return -EBUSY; + } __iget(old); spin_unlock(&old->i_lock); spin_unlock(&inode_hash_lock); @@ -1443,7 +1477,10 @@ EXPORT_SYMBOL(insert_inode_locked); int insert_inode_locked4(struct inode *inode, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { - struct inode *old = inode_insert5(inode, hashval, test, NULL, data); + struct inode *old; + + inode->i_state |= I_CREATING; + old = inode_insert5(inode, hashval, test, NULL, data); if (old != inode) { iput(old); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5c91108846db..a42600565925 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2016,6 +2016,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper * and work dirs among overlayfs mounts. * + * I_CREATING New object's inode in the middle of setting up. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -2036,7 +2038,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) #define __I_DIRTY_TIME_EXPIRED 12 #define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) #define I_WB_SWITCH (1 << 13) -#define I_OVL_INUSE (1 << 14) +#define I_OVL_INUSE (1 << 14) +#define I_CREATING (1 << 15) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) @@ -2919,6 +2922,7 @@ extern void lockdep_annotate_inode_mutex_key(struct inode *inode); static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { }; #endif extern void unlock_new_inode(struct inode *); +extern void discard_new_inode(struct inode *); extern unsigned int get_next_ino(void); extern void evict_inodes(struct super_block *sb); -- cgit v1.2.3-55-g7522 From 32955c5422a8a5460bbefe2a6fc51eadcafff4c9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 May 2018 12:20:05 -0400 Subject: btrfs: switch to discard_new_inode() Make sure that no partially set up inodes can be returned by open-by-handle. Signed-off-by: Al Viro --- fs/btrfs/inode.c | 106 ++++++++++++++++++++----------------------------------- 1 file changed, 39 insertions(+), 67 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e9482f0db9d0..9382e0881900 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6335,8 +6335,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, location->type = BTRFS_INODE_ITEM_KEY; ret = btrfs_insert_inode_locked(inode); - if (ret < 0) + if (ret < 0) { + iput(inode); goto fail; + } path->leave_spinning = 1; ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems); @@ -6395,12 +6397,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return inode; fail_unlock: - unlock_new_inode(inode); + discard_new_inode(inode); fail: if (dir && name) BTRFS_I(dir)->index_cnt--; btrfs_free_path(path); - iput(inode); return ERR_PTR(ret); } @@ -6505,7 +6506,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = NULL; int err; - int drop_inode = 0; u64 objectid; u64 index = 0; @@ -6527,6 +6527,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); + inode = NULL; goto out_unlock; } @@ -6541,31 +6542,24 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - goto out_unlock_inode; + goto out_unlock; err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode), 0, index); - if (err) { - goto out_unlock_inode; - } else { - btrfs_update_inode(trans, root, inode); - d_instantiate_new(dentry, inode); - } + if (err) + goto out_unlock; + + btrfs_update_inode(trans, root, inode); + d_instantiate_new(dentry, inode); out_unlock: btrfs_end_transaction(trans); btrfs_btree_balance_dirty(fs_info); - if (drop_inode) { + if (err && inode) { inode_dec_link_count(inode); - iput(inode); + discard_new_inode(inode); } return err; - -out_unlock_inode: - drop_inode = 1; - unlock_new_inode(inode); - goto out_unlock; - } static int btrfs_create(struct inode *dir, struct dentry *dentry, @@ -6575,7 +6569,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = NULL; - int drop_inode_on_err = 0; int err; u64 objectid; u64 index = 0; @@ -6598,9 +6591,9 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); + inode = NULL; goto out_unlock; } - drop_inode_on_err = 1; /* * If the active LSM wants to access the inode during * d_instantiate it needs these. Smack checks to see @@ -6613,33 +6606,28 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - goto out_unlock_inode; + goto out_unlock; err = btrfs_update_inode(trans, root, inode); if (err) - goto out_unlock_inode; + goto out_unlock; err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode), 0, index); if (err) - goto out_unlock_inode; + goto out_unlock; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; d_instantiate_new(dentry, inode); out_unlock: btrfs_end_transaction(trans); - if (err && drop_inode_on_err) { + if (err && inode) { inode_dec_link_count(inode); - iput(inode); + discard_new_inode(inode); } btrfs_btree_balance_dirty(fs_info); return err; - -out_unlock_inode: - unlock_new_inode(inode); - goto out_unlock; - } static int btrfs_link(struct dentry *old_dentry, struct inode *dir, @@ -6748,6 +6736,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) S_IFDIR | mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); + inode = NULL; goto out_fail; } @@ -6758,34 +6747,30 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - goto out_fail_inode; + goto out_fail; btrfs_i_size_write(BTRFS_I(inode), 0); err = btrfs_update_inode(trans, root, inode); if (err) - goto out_fail_inode; + goto out_fail; err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), dentry->d_name.name, dentry->d_name.len, 0, index); if (err) - goto out_fail_inode; + goto out_fail; d_instantiate_new(dentry, inode); drop_on_err = 0; out_fail: btrfs_end_transaction(trans); - if (drop_on_err) { + if (err && inode) { inode_dec_link_count(inode); - iput(inode); + discard_new_inode(inode); } btrfs_btree_balance_dirty(fs_info); return err; - -out_fail_inode: - unlock_new_inode(inode); - goto out_fail; } static noinline int uncompress_inline(struct btrfs_path *path, @@ -10112,7 +10097,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, struct btrfs_key key; struct inode *inode = NULL; int err; - int drop_inode = 0; u64 objectid; u64 index = 0; int name_len; @@ -10145,6 +10129,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, objectid, S_IFLNK|S_IRWXUGO, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); + inode = NULL; goto out_unlock; } @@ -10161,12 +10146,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - goto out_unlock_inode; + goto out_unlock; path = btrfs_alloc_path(); if (!path) { err = -ENOMEM; - goto out_unlock_inode; + goto out_unlock; } key.objectid = btrfs_ino(BTRFS_I(inode)); key.offset = 0; @@ -10176,7 +10161,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, datasize); if (err) { btrfs_free_path(path); - goto out_unlock_inode; + goto out_unlock; } leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, path->slots[0], @@ -10208,26 +10193,19 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (!err) err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode), 0, index); - if (err) { - drop_inode = 1; - goto out_unlock_inode; - } + if (err) + goto out_unlock; d_instantiate_new(dentry, inode); out_unlock: btrfs_end_transaction(trans); - if (drop_inode) { + if (err && inode) { inode_dec_link_count(inode); - iput(inode); + discard_new_inode(inode); } btrfs_btree_balance_dirty(fs_info); return err; - -out_unlock_inode: - drop_inode = 1; - unlock_new_inode(inode); - goto out_unlock; } static int __btrfs_prealloc_file_range(struct inode *inode, int mode, @@ -10436,14 +10414,14 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) ret = btrfs_init_inode_security(trans, inode, dir, NULL); if (ret) - goto out_inode; + goto out; ret = btrfs_update_inode(trans, root, inode); if (ret) - goto out_inode; + goto out; ret = btrfs_orphan_add(trans, BTRFS_I(inode)); if (ret) - goto out_inode; + goto out; /* * We set number of links to 0 in btrfs_new_inode(), and here we set @@ -10453,21 +10431,15 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) * d_tmpfile() -> inode_dec_link_count() -> drop_nlink() */ set_nlink(inode, 1); - unlock_new_inode(inode); d_tmpfile(dentry, inode); + unlock_new_inode(inode); mark_inode_dirty(inode); - out: btrfs_end_transaction(trans); - if (ret) - iput(inode); + if (ret && inode) + discard_new_inode(inode); btrfs_btree_balance_dirty(fs_info); return ret; - -out_inode: - unlock_new_inode(inode); - goto out; - } __attribute__((const)) -- cgit v1.2.3-55-g7522 From dd54992776ebb44519ba4cd69145c4f19d166ddb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 May 2018 12:22:50 -0400 Subject: ufs: switch to discard_new_inode() we don't want open-by-handle to pick an in-core inode that has failed setup halfway through. Signed-off-by: Al Viro --- fs/ufs/ialloc.c | 3 +-- fs/ufs/namei.c | 9 +++------ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index e1ef0f0a1353..02c0a4be4212 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -343,8 +343,7 @@ cg_found: fail_remove_inode: mutex_unlock(&sbi->s_lock); clear_nlink(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); UFSD("EXIT (FAILED): err %d\n", err); return ERR_PTR(err); failed: diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index d5f43ba76c59..9ef40f100415 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -43,8 +43,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode) return 0; } inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return err; } @@ -142,8 +141,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, out_fail: inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return err; } @@ -198,8 +196,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); - unlock_new_inode(inode); - iput (inode); + discard_new_inode(inode); out_dir: inode_dec_link_count(dir); return err; -- cgit v1.2.3-55-g7522 From 5c1a68a358f94b9ac2e33183327bc04f207feed2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 May 2018 12:25:39 -0400 Subject: udf: switch to discard_new_inode() we don't want open-by-handle to pick an in-core inode that has failed setup halfway through. Signed-off-by: Al Viro --- fs/udf/namei.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/udf/namei.c b/fs/udf/namei.c index c586026508db..061d049c2620 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -608,8 +608,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode) fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); if (unlikely(!fi)) { inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return err; } cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); @@ -700,8 +699,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err); if (!fi) { inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); goto out; } set_nlink(inode, 2); @@ -719,8 +717,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (!fi) { clear_nlink(inode); mark_inode_dirty(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); goto out; } cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); @@ -1047,8 +1044,7 @@ out: out_no_entry: up_write(&iinfo->i_data_sem); inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); goto out; } -- cgit v1.2.3-55-g7522 From 2e5afe54e0cd6fce79b51ca547caf08a990ad56d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 May 2018 18:29:56 -0400 Subject: ext2: make sure that partially set up inodes won't be returned by ext2_iget() Signed-off-by: Al Viro --- fs/ext2/ialloc.c | 3 +-- fs/ext2/namei.c | 9 +++------ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 6484199b35d1..5c3d7b7e4975 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -611,8 +611,7 @@ fail_drop: dquot_drop(inode); inode->i_flags |= S_NOQUOTA; clear_nlink(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return ERR_PTR(err); fail: diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 152453a91877..0c26dcc5d850 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -45,8 +45,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) return 0; } inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return err; } @@ -192,8 +191,7 @@ out: out_fail: inode_dec_link_count(inode); - unlock_new_inode(inode); - iput (inode); + discard_new_inode(inode); goto out; } @@ -261,8 +259,7 @@ out: out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); out_dir: inode_dec_link_count(dir); goto out; -- cgit v1.2.3-55-g7522 From a6cbedfa8783b42b9272c05297865bdb501005cb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 29 Jun 2018 11:59:37 -0400 Subject: jfs: switch to discard_new_inode() we don't want open-by-handle to pick an in-core inode that has failed setup halfway through. Signed-off-by: Al Viro --- fs/jfs/jfs_inode.c | 8 ++++---- fs/jfs/namei.c | 12 ++++-------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 5e9b7bb3aabf..96732c24b054 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -61,8 +61,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) inode = new_inode(sb); if (!inode) { jfs_warn("ialloc: new_inode returned NULL!"); - rc = -ENOMEM; - goto fail; + return ERR_PTR(-ENOMEM); } jfs_inode = JFS_IP(inode); @@ -141,9 +140,10 @@ fail_drop: dquot_drop(inode); inode->i_flags |= S_NOQUOTA; clear_nlink(inode); - unlock_new_inode(inode); + discard_new_inode(inode); + return ERR_PTR(rc); + fail_put: iput(inode); -fail: return ERR_PTR(rc); } diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 56c3fcbfe80e..14528c0ffe63 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -175,8 +175,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, if (rc) { free_ea_wmap(ip); clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); + discard_new_inode(ip); } else { d_instantiate_new(dentry, ip); } @@ -309,8 +308,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) if (rc) { free_ea_wmap(ip); clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); + discard_new_inode(ip); } else { d_instantiate_new(dentry, ip); } @@ -1054,8 +1052,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, if (rc) { free_ea_wmap(ip); clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); + discard_new_inode(ip); } else { d_instantiate_new(dentry, ip); } @@ -1441,8 +1438,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, if (rc) { free_ea_wmap(ip); clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); + discard_new_inode(ip); } else { d_instantiate_new(dentry, ip); } -- cgit v1.2.3-55-g7522 From e950564b97fd0f541b02eb207685d0746f5ecf29 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 24 Jul 2018 15:01:55 +0200 Subject: vfs: don't evict uninitialized inode iput() ends up calling ->evict() on new inode, which is not yet initialized by owning fs. So use destroy_inode() instead. Add to sb->s_inodes list only if inode is not in I_CREATING state (meaning that it wasn't allocated with new_inode(), which already does the insertion). Reported-by: Al Viro Signed-off-by: Miklos Szeredi Fixes: 80ea09a002bf ("vfs: factor out inode_insert5()") Signed-off-by: Al Viro --- fs/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 6cd2e7ba9f4d..e44a97584158 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1050,6 +1050,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval, { struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); struct inode *old; + bool creating = inode->i_state & I_CREATING; again: spin_lock(&inode_hash_lock); @@ -1083,6 +1084,8 @@ again: inode->i_state |= I_NEW; hlist_add_head(&inode->i_hash, head); spin_unlock(&inode->i_lock); + if (!creating) + inode_sb_list_add(inode); unlock: spin_unlock(&inode_hash_lock); @@ -1117,12 +1120,13 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, struct inode *inode = ilookup5(sb, hashval, test, data); if (!inode) { - struct inode *new = new_inode(sb); + struct inode *new = alloc_inode(sb); if (new) { + new->i_state = 0; inode = inode_insert5(new, hashval, test, set, data); if (unlikely(inode != new)) - iput(new); + destroy_inode(new); } } return inode; -- cgit v1.2.3-55-g7522 From 5bef915104f32c9d0bb5df6e86a98e31cb524e9a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 29 Jun 2018 19:36:57 -0400 Subject: new helper: inode_fake_hash() open-coded in a quite a few places... Signed-off-by: Al Viro --- fs/hfs/inode.c | 2 +- fs/jfs/jfs_imap.c | 8 +------- fs/jfs/super.c | 2 +- fs/xfs/xfs_iops.c | 2 +- include/linux/fs.h | 11 +++++++++++ 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 2a16111d312f..a2dfa1b2a89c 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -541,7 +541,7 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, HFS_I(inode)->rsrc_inode = dir; HFS_I(dir)->rsrc_inode = inode; igrab(dir); - hlist_add_fake(&inode->i_hash); + inode_fake_hash(inode); mark_inode_dirty(inode); dont_mount(dentry); out: diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index f36ef68905a7..93e8c590ff5c 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -491,13 +491,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) /* release the page */ release_metapage(mp); - /* - * __mark_inode_dirty expects inodes to be hashed. Since we don't - * want special inodes in the fileset inode space, we make them - * appear hashed, but do not put on any lists. hlist_del() - * will work fine and require no locking. - */ - hlist_add_fake(&ip->i_hash); + inode_fake_hash(ip); return (ip); } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 1b9264fd54b6..5403ece57dba 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -581,7 +581,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) inode->i_ino = 0; inode->i_size = i_size_read(sb->s_bdev->bd_inode); inode->i_mapping->a_ops = &jfs_metapage_aops; - hlist_add_fake(&inode->i_hash); + inode_fake_hash(inode); mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); sbi->direct_inode = inode; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 0fa29f39d658..3a75de777843 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1253,7 +1253,7 @@ xfs_setup_inode( inode_sb_list_add(inode); /* make the inode look hashed for the writeback code */ - hlist_add_fake(&inode->i_hash); + inode_fake_hash(inode); inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid); inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid); diff --git a/include/linux/fs.h b/include/linux/fs.h index a42600565925..43941e230e2b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -684,6 +684,17 @@ static inline int inode_unhashed(struct inode *inode) return hlist_unhashed(&inode->i_hash); } +/* + * __mark_inode_dirty expects inodes to be hashed. Since we don't + * want special inodes in the fileset inode space, we make them + * appear hashed, but do not put on any lists. hlist_del() + * will work fine and require no locking. + */ +static inline void inode_fake_hash(struct inode *inode) +{ + hlist_add_fake(&inode->i_hash); +} + /* * inode->i_mutex nesting subclasses for the lock validator: * -- cgit v1.2.3-55-g7522 From d8e78da8682028ad53d040339a2b9d6fb6092d63 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Jun 2018 03:15:49 -0400 Subject: adfs: don't put inodes into icache We never look them up in there; inode_fake_hash() will make them appear hashed for mark_inode_dirty() purposes. And don't leave them around until memory pressure kicks them out - we never look them up again. Signed-off-by: Al Viro --- fs/adfs/inode.c | 2 +- fs/adfs/super.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index c836c425ca94..e91028d4340a 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -287,7 +287,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj) ADFS_I(inode)->mmu_private = inode->i_size; } - insert_inode_hash(inode); + inode_fake_hash(inode); out: return inode; diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 71fa525d63a0..7e099a7a4eb1 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -291,6 +291,7 @@ static void destroy_inodecache(void) static const struct super_operations adfs_sops = { .alloc_inode = adfs_alloc_inode, .destroy_inode = adfs_destroy_inode, + .drop_inode = generic_delete_inode, .write_inode = adfs_write_inode, .put_super = adfs_put_super, .statfs = adfs_statfs, -- cgit v1.2.3-55-g7522 From c7b15a8657da7f8d11269c7cc3d8beef10d26b43 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Jun 2018 14:32:04 -0400 Subject: jfs: don't bother with make_bad_inode() in ialloc() We hit that when inumber allocation has failed. In that case the in-core inode is not hashed and since its ->i_nlink is 1 the only place where jfs checks is_bad_inode() won't be reached. Signed-off-by: Al Viro --- fs/jfs/jfs_inode.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 96732c24b054..4572b7cf183d 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -69,8 +69,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode) rc = diAlloc(parent, S_ISDIR(mode), inode); if (rc) { jfs_warn("ialloc: diAlloc returned %d!", rc); - if (rc == -EIO) - make_bad_inode(inode); goto fail_put; } -- cgit v1.2.3-55-g7522