diff options
Diffstat (limited to 'fs')
196 files changed, 2769 insertions, 1902 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c index eb3589edf485..0576eaeb60b9 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -239,13 +239,13 @@ static int v9fs_xattr_get_acl(const struct xattr_handler *handler, } static int v9fs_xattr_set_acl(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { int retval; struct posix_acl *acl; struct v9fs_session_info *v9ses; - struct inode *inode = d_inode(dentry); v9ses = v9fs_dentry2v9ses(dentry); /* diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index b84c291ba1eb..d7b78d531e63 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -74,7 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) v9fs_proto_dotu(v9ses)); fid = file->private_data; if (!fid) { - fid = v9fs_fid_clone(file->f_path.dentry); + fid = v9fs_fid_clone(file_dentry(file)); if (IS_ERR(fid)) return PTR_ERR(fid); @@ -100,7 +100,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) * because we want write after unlink usecase * to work. */ - fid = v9fs_writeback_fid(file->f_path.dentry); + fid = v9fs_writeback_fid(file_dentry(file)); if (IS_ERR(fid)) { err = PTR_ERR(fid); mutex_unlock(&v9inode->v_mutex); @@ -516,7 +516,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma) * because we want write after unlink usecase * to work. */ - fid = v9fs_writeback_fid(filp->f_path.dentry); + fid = v9fs_writeback_fid(file_dentry(filp)); if (IS_ERR(fid)) { retval = PTR_ERR(fid); mutex_unlock(&v9inode->v_mutex); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index f4645c515262..e2e7c749925a 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -853,7 +853,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, struct p9_fid *fid, *inode_fid; struct dentry *res = NULL; - if (d_unhashed(dentry)) { + if (d_in_lookup(dentry)) { res = v9fs_vfs_lookup(dir, dentry, 0); if (IS_ERR(res)) return PTR_ERR(res); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index a34702c998f5..1b51eaa5e2dd 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -254,7 +254,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, struct posix_acl *pacl = NULL, *dacl = NULL; struct dentry *res = NULL; - if (d_unhashed(dentry)) { + if (d_in_lookup(dentry)) { res = v9fs_vfs_lookup(dir, dentry, 0); if (IS_ERR(res)) return PTR_ERR(res); diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index 18c62bae9591..a6bd349bab23 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -147,8 +147,9 @@ static int v9fs_xattr_handler_get(const struct xattr_handler *handler, } static int v9fs_xattr_handler_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { const char *full_name = xattr_full_name(handler, name); diff --git a/fs/affs/super.c b/fs/affs/super.c index 2a6713b6b9f4..d6384863192c 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -528,7 +528,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) char *prefix = NULL; new_opts = kstrdup(data, GFP_KERNEL); - if (!new_opts) + if (data && !new_opts) return -ENOMEM; pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data); @@ -546,7 +546,8 @@ affs_remount(struct super_block *sb, int *flags, char *data) } flush_delayed_work(&sbi->sb_work); - replace_mount_options(sb, new_opts); + if (new_opts) + replace_mount_options(sb, new_opts); sbi->s_flags = mount_flags; sbi->s_mode = mode; diff --git a/fs/afs/write.c b/fs/afs/write.c index 65de439bdc4f..14d506efd1aa 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -643,10 +643,6 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) return 0; result = generic_file_write_iter(iocb, from); - if (IS_ERR_VALUE(result)) { - _leave(" = %zd", result); - return result; - } _leave(" = %zd", result); return result; diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index f0d268b97d19..a439548de785 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -70,9 +70,13 @@ struct autofs_info { }; #define AUTOFS_INF_EXPIRING (1<<0) /* dentry in the process of expiring */ -#define AUTOFS_INF_NO_RCU (1<<1) /* the dentry is being considered +#define AUTOFS_INF_WANT_EXPIRE (1<<1) /* the dentry is being considered * for expiry, so RCU_walk is - * not permitted + * not permitted. If it progresses to + * actual expiry attempt, the flag is + * not cleared when EXPIRING is set - + * in that case it gets cleared only + * when it comes to clearing EXPIRING. */ #define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */ diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 9510d8d2e9cd..b493909e7492 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -316,19 +316,17 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, if (ino->flags & AUTOFS_INF_PENDING) goto out; if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { - ino->flags |= AUTOFS_INF_NO_RCU; + ino->flags |= AUTOFS_INF_WANT_EXPIRE; spin_unlock(&sbi->fs_lock); synchronize_rcu(); spin_lock(&sbi->fs_lock); if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { ino->flags |= AUTOFS_INF_EXPIRING; - smp_mb(); - ino->flags &= ~AUTOFS_INF_NO_RCU; init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); return root; } - ino->flags &= ~AUTOFS_INF_NO_RCU; + ino->flags &= ~AUTOFS_INF_WANT_EXPIRE; } out: spin_unlock(&sbi->fs_lock); @@ -446,7 +444,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, while ((dentry = get_next_positive_subdir(dentry, root))) { spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(dentry); - if (ino->flags & AUTOFS_INF_NO_RCU) + if (ino->flags & AUTOFS_INF_WANT_EXPIRE) expired = NULL; else expired = should_expire(dentry, mnt, timeout, how); @@ -455,7 +453,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, continue; } ino = autofs4_dentry_ino(expired); - ino->flags |= AUTOFS_INF_NO_RCU; + ino->flags |= AUTOFS_INF_WANT_EXPIRE; spin_unlock(&sbi->fs_lock); synchronize_rcu(); spin_lock(&sbi->fs_lock); @@ -465,7 +463,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, goto found; } - ino->flags &= ~AUTOFS_INF_NO_RCU; + ino->flags &= ~AUTOFS_INF_WANT_EXPIRE; if (expired != dentry) dput(expired); spin_unlock(&sbi->fs_lock); @@ -475,17 +473,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, found: pr_debug("returning %p %pd\n", expired, expired); ino->flags |= AUTOFS_INF_EXPIRING; - smp_mb(); - ino->flags &= ~AUTOFS_INF_NO_RCU; init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); - spin_lock(&sbi->lookup_lock); - spin_lock(&expired->d_parent->d_lock); - spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&expired->d_parent->d_subdirs, &expired->d_child); - spin_unlock(&expired->d_lock); - spin_unlock(&expired->d_parent->d_lock); - spin_unlock(&sbi->lookup_lock); return expired; } @@ -496,7 +485,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) int status; /* Block on any pending expire */ - if (!(ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU))) + if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE)) return 0; if (rcu_walk) return -ECHILD; @@ -554,7 +543,7 @@ int autofs4_expire_run(struct super_block *sb, ino = autofs4_dentry_ino(dentry); /* avoid rapid-fire expire attempts if expiry fails */ ino->last_used = now; - ino->flags &= ~AUTOFS_INF_EXPIRING; + ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE); complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); @@ -583,7 +572,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, spin_lock(&sbi->fs_lock); /* avoid rapid-fire expire attempts if expiry fails */ ino->last_used = now; - ino->flags &= ~AUTOFS_INF_EXPIRING; + ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE); complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); dput(dentry); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 78bd80298528..3767f6641af1 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -458,7 +458,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) */ struct inode *inode; - if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU)) + if (ino->flags & AUTOFS_INF_WANT_EXPIRE) return 0; if (d_mountpoint(dentry)) return 0; diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 0146d911f468..631f1554c87b 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -66,11 +66,12 @@ static int autofs4_write(struct autofs_sb_info *sbi, set_fs(KERNEL_DS); mutex_lock(&sbi->pipe_mutex); - wr = __vfs_write(file, data, bytes, &file->f_pos); - while (bytes && wr) { + while (bytes) { + wr = __vfs_write(file, data, bytes, &file->f_pos); + if (wr <= 0) + break; data += wr; bytes -= wr; - wr = __vfs_write(file, data, bytes, &file->f_pos); } mutex_unlock(&sbi->pipe_mutex); diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 72e35b721608..3ba385eaa26e 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -100,8 +100,8 @@ static int bad_inode_setattr(struct dentry *direntry, struct iattr *attrs) return -EIO; } -static int bad_inode_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) +static int bad_inode_setxattr(struct dentry *dentry, struct inode *inode, + const char *name, const void *value, size_t size, int flags) { return -EIO; } diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 2fab9f130e51..ae1b5404fced 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -127,12 +127,8 @@ static int set_brk(unsigned long start, unsigned long end) { start = PAGE_ALIGN(start); end = PAGE_ALIGN(end); - if (end > start) { - unsigned long addr; - addr = vm_brk(start, end - start); - if (BAD_ADDR(addr)) - return addr; - } + if (end > start) + return vm_brk(start, end - start); return 0; } @@ -275,7 +271,7 @@ static int load_aout_binary(struct linux_binprm * bprm) map_size = ex.a_text+ex.a_data; #endif error = vm_brk(text_addr & PAGE_MASK, map_size); - if (error != (text_addr & PAGE_MASK)) + if (error) return error; error = read_code(bprm->file, text_addr, pos, @@ -298,7 +294,7 @@ static int load_aout_binary(struct linux_binprm * bprm) if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { error = vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); - if (IS_ERR_VALUE(error)) + if (error) return error; read_code(bprm->file, N_TXTADDR(ex), fd_offset, @@ -382,7 +378,7 @@ static int load_aout_library(struct file *file) file); } retval = vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); - if (IS_ERR_VALUE(retval)) + if (retval) goto out; read_code(file, start_addr, N_TXTOFF(ex), @@ -402,9 +398,8 @@ static int load_aout_library(struct file *file) len = PAGE_ALIGN(ex.a_text + ex.a_data); bss = ex.a_text + ex.a_data + ex.a_bss; if (bss > len) { - error = vm_brk(start_addr + len, bss - len); - retval = error; - if (error != start_addr + len) + retval = vm_brk(start_addr + len, bss - len); + if (retval) goto out; } retval = 0; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 938fc4ede764..a7a28110dc80 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -96,10 +96,9 @@ static int set_brk(unsigned long start, unsigned long end) start = ELF_PAGEALIGN(start); end = ELF_PAGEALIGN(end); if (end > start) { - unsigned long addr; - addr = vm_brk(start, end - start); - if (BAD_ADDR(addr)) - return addr; + int error = vm_brk(start, end - start); + if (error) + return error; } current->mm->start_brk = current->mm->brk = end; return 0; @@ -629,7 +628,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, /* Map the last of the bss segment */ error = vm_brk(elf_bss, last_bss - elf_bss); - if (BAD_ADDR(error)) + if (error) goto out; } @@ -1178,7 +1177,7 @@ static int load_elf_library(struct file *file) bss = eppnt->p_memsz + eppnt->p_vaddr; if (bss > len) { error = vm_brk(len, bss - len); - if (BAD_ADDR(error)) + if (error) goto out_free_ph; } error = 0; @@ -2276,7 +2275,7 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; /* Align to page */ - if (!dump_skip(cprm, dataoff - cprm->file->f_pos)) + if (!dump_skip(cprm, dataoff - cprm->pos)) goto end_coredump; for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 71ade0e556b7..203589311bf8 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1787,7 +1787,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto end_coredump; } - if (!dump_skip(cprm, dataoff - cprm->file->f_pos)) + if (!dump_skip(cprm, dataoff - cprm->pos)) goto end_coredump; if (!elf_fdpic_dump_segments(cprm)) diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index f723cd3a455c..caf9e39bb82b 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -337,7 +337,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp) "(%d != %d)", (unsigned) r, curid, id); goto failed; } else if ( ! p->lib_list[id].loaded && - IS_ERR_VALUE(load_flat_shared_library(id, p))) { + load_flat_shared_library(id, p) < 0) { printk("BINFMT_FLAT: failed to load library %d", id); goto failed; } @@ -837,7 +837,7 @@ static int load_flat_shared_library(int id, struct lib_info *libs) res = prepare_binprm(&bprm); - if (!IS_ERR_VALUE(res)) + if (!res) res = load_flat_file(&bprm, libs, id, NULL); abort_creds(bprm.cred); @@ -883,7 +883,7 @@ static int load_flat_binary(struct linux_binprm * bprm) stack_len += FLAT_STACK_ALIGN - 1; /* reserve for upcoming alignment */ res = load_flat_file(bprm, &libinfo, 0, &stack_len); - if (IS_ERR_VALUE(res)) + if (res < 0) return res; /* Update data segment pointers for all libraries */ diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index d3090187fd76..8bb3509099e8 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1939,7 +1939,7 @@ static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, * from ipath->fspath->val[i]. * when it returns, there are ipath->fspath->elem_cnt number of paths available * in ipath->fspath->val[]. when the allocated space wasn't sufficient, the - * number of missed paths in recored in ipath->fspath->elem_missed, otherwise, + * number of missed paths is recorded in ipath->fspath->elem_missed, otherwise, * it's zero. ipath->fspath->bytes_missing holds the number of bytes that would * have been needed to return all paths. */ diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 1da5753d886d..4919aedb5fc1 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -313,7 +313,7 @@ struct btrfs_dio_private { struct bio *dio_bio; /* - * The original bio may be splited to several sub-bios, this is + * The original bio may be split to several sub-bios, this is * done during endio of sub-bios */ int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int); diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 516e19d1d202..7706c8dc5fa6 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1939,7 +1939,7 @@ again: /* * Clear all references of this block. Do not free * the block itself even if is not referenced anymore - * because it still carries valueable information + * because it still carries valuable information * like whether it was ever written and IO completed. */ list_for_each_entry_safe(l, tmp, &block->ref_to_list, @@ -2645,7 +2645,7 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, * This algorithm is recursive because the amount of used stack space * is very small and the max recursion depth is limited. */ - indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", + indent_add = sprintf(buf, "%c-%llu(%s/%llu/%u)", btrfsic_get_block_type(state, block), block->logical_bytenr, block->dev_state->name, block->dev_bytenr, block->mirror_num); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index decd0a3f5d61..a85cf7d23309 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -156,7 +156,7 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root) /* * RCU really hurts here, we could free up the root node because - * it was cow'ed but we may not get the new root node yet so do + * it was COWed but we may not get the new root node yet so do * the inc_not_zero dance and if it doesn't work then * synchronize_rcu and try again. */ @@ -955,7 +955,7 @@ int btrfs_block_can_be_shared(struct btrfs_root *root, struct extent_buffer *buf) { /* - * Tree blocks not in refernece counted trees and tree roots + * Tree blocks not in reference counted trees and tree roots * are never shared. If a block was allocated after the last * snapshot and the block was not allocated by tree relocation, * we know the block is not shared. @@ -1270,7 +1270,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, /* * tm is a pointer to the first operation to rewind within eb. then, all - * previous operations will be rewinded (until we reach something older than + * previous operations will be rewound (until we reach something older than * time_seq). */ static void @@ -1345,7 +1345,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, } /* - * Called with eb read locked. If the buffer cannot be rewinded, the same buffer + * Called with eb read locked. If the buffer cannot be rewound, the same buffer * is returned. If rewind operations happen, a fresh buffer is returned. The * returned buffer is always read-locked. If the returned buffer is not the * input buffer, the lock on the input buffer is released and the input buffer @@ -1373,7 +1373,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { BUG_ON(tm->slot != 0); - eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start); + eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start, + eb->len); if (!eb_rewin) { btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); @@ -1454,7 +1455,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq) } else if (old_root) { btrfs_tree_read_unlock(eb_root); free_extent_buffer(eb_root); - eb = alloc_dummy_extent_buffer(root->fs_info, logical); + eb = alloc_dummy_extent_buffer(root->fs_info, logical, + root->nodesize); } else { btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); eb = btrfs_clone_extent_buffer(eb_root); @@ -1516,7 +1518,7 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans, * 3) the root is not forced COW. * * What is forced COW: - * when we create snapshot during commiting the transaction, + * when we create snapshot during committing the transaction, * after we've finished coping src root, we must COW the shared * block to ensure the metadata consistency. */ @@ -1531,7 +1533,7 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans, /* * cows a single block, see __btrfs_cow_block for the real work. - * This version of it has extra checks so that a block isn't cow'd more than + * This version of it has extra checks so that a block isn't COWed more than * once per transaction, as long as it hasn't been written yet */ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, @@ -1552,6 +1554,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, trans->transid, root->fs_info->generation); if (!should_cow_block(trans, root, buf)) { + trans->dirty = true; *cow_ret = buf; return 0; } @@ -1783,10 +1786,12 @@ static noinline int generic_bin_search(struct extent_buffer *eb, if (!err) { tmp = (struct btrfs_disk_key *)(kaddr + offset - map_start); - } else { + } else if (err == 1) { read_extent_buffer(eb, &unaligned, offset, sizeof(unaligned)); tmp = &unaligned; + } else { + return err; } } else { @@ -2510,6 +2515,8 @@ read_block_for_search(struct btrfs_trans_handle *trans, if (!btrfs_buffer_uptodate(tmp, 0, 0)) ret = -EIO; free_extent_buffer(tmp); + } else { + ret = PTR_ERR(tmp); } return ret; } @@ -2773,8 +2780,10 @@ again: * then we don't want to set the path blocking, * so we test it here */ - if (!should_cow_block(trans, root, b)) + if (!should_cow_block(trans, root, b)) { + trans->dirty = true; goto cow_done; + } /* * must have write locks on this node and the @@ -2823,6 +2832,8 @@ cow_done: } ret = key_search(b, key, level, &prev_cmp, &slot); + if (ret < 0) + goto done; if (level != 0) { int dec = 0; @@ -2986,7 +2997,7 @@ again: btrfs_unlock_up_safe(p, level + 1); /* - * Since we can unwind eb's we want to do a real search every + * Since we can unwind ebs we want to do a real search every * time. */ prev_cmp = -1; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ddcc58f03c79..4274a7bfdaed 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -89,7 +89,7 @@ static const int btrfs_csum_sizes[] = { 4 }; /* four bytes for CRC32 */ #define BTRFS_EMPTY_DIR_SIZE 0 -/* spefic to btrfs_map_block(), therefore not in include/linux/blk_types.h */ +/* specific to btrfs_map_block(), therefore not in include/linux/blk_types.h */ #define REQ_GET_READ_MIRRORS (1 << 30) /* ioprio of readahead is set to idle */ @@ -431,7 +431,7 @@ struct btrfs_space_info { * bytes_pinned does not reflect the bytes that will be pinned once the * delayed refs are flushed, so this counter is inc'ed every time we * call btrfs_free_extent so it is a realtime count of what will be - * freed once the transaction is committed. It will be zero'ed every + * freed once the transaction is committed. It will be zeroed every * time the transaction commits. */ struct percpu_counter total_bytes_pinned; @@ -1401,7 +1401,7 @@ static inline void btrfs_init_map_token (struct btrfs_map_token *token) token->kaddr = NULL; } -/* some macros to generate set/get funcs for the struct fields. This +/* some macros to generate set/get functions for the struct fields. This * assumes there is a lefoo_to_cpu for every type, so lets make a simple * one for u8: */ @@ -2518,7 +2518,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); int btrfs_async_run_delayed_refs(struct btrfs_root *root, - unsigned long count, int wait); + unsigned long count, u64 transid, int wait); int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 61561c2a3f96..d3aaabbfada0 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1606,15 +1606,23 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode) return 0; } -void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, - struct list_head *del_list) +bool btrfs_readdir_get_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list) { struct btrfs_delayed_node *delayed_node; struct btrfs_delayed_item *item; delayed_node = btrfs_get_delayed_node(inode); if (!delayed_node) - return; + return false; + + /* + * We can only do one readdir with delayed items at a time because of + * item->readdir_list. + */ + inode_unlock_shared(inode); + inode_lock(inode); mutex_lock(&delayed_node->mutex); item = __btrfs_first_delayed_insertion_item(delayed_node); @@ -1641,10 +1649,13 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, * requeue or dequeue this delayed node. */ atomic_dec(&delayed_node->refs); + + return true; } -void btrfs_put_delayed_items(struct list_head *ins_list, - struct list_head *del_list) +void btrfs_readdir_put_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list) { struct btrfs_delayed_item *curr, *next; @@ -1659,6 +1670,12 @@ void btrfs_put_delayed_items(struct list_head *ins_list, if (atomic_dec_and_test(&curr->refs)) kfree(curr); } + + /* + * The VFS is going to do up_read(), so we need to downgrade back to a + * read lock. + */ + downgrade_write(&inode->i_rwsem); } int btrfs_should_delete_dir_index(struct list_head *del_list, diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 0167853c84ae..2495b3d4075f 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -137,10 +137,12 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); void btrfs_destroy_delayed_inodes(struct btrfs_root *root); /* Used for readdir() */ -void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, - struct list_head *del_list); -void btrfs_put_delayed_items(struct list_head *ins_list, - struct list_head *del_list); +bool btrfs_readdir_get_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list); +void btrfs_readdir_put_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list); int btrfs_should_delete_dir_index(struct list_head *del_list, u64 index); int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index c24b653c7343..5fca9534a271 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -188,7 +188,7 @@ struct btrfs_delayed_ref_root { /* * To make qgroup to skip given root. - * This is for snapshot, as btrfs_qgroup_inherit() will manully + * This is for snapshot, as btrfs_qgroup_inherit() will manually * modify counters for snapshot and its source, so we should skip * the snapshot in new_root/old_roots or it will get calculated twice */ diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 85f12e6e28d2..63ef9cdf0144 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -450,7 +450,7 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_root *root, } /* - * blocked until all flighting bios are finished. + * blocked until all in-flight bios operations are finished. */ static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 91d123938cef..60ce1190307b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -384,7 +384,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, /* * Things reading via commit roots that don't have normal protection, * like send, can have a really old block in cache that may point at a - * block that has been free'd and re-allocated. So don't clear uptodate + * block that has been freed and re-allocated. So don't clear uptodate * if we find an eb that is under IO (dirty/writeback) because we could * end up reading in the stale data and then writing it back out and * making everybody very sad. @@ -418,7 +418,7 @@ static int btrfs_check_super_csum(char *raw_disk_sb) /* * The super_block structure does not span the whole * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space - * is filled with zeros and is included in the checkum. + * is filled with zeros and is included in the checksum. */ crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE, crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); @@ -600,7 +600,7 @@ static noinline int check_leaf(struct btrfs_root *root, /* * Check to make sure that we don't point outside of the leaf, - * just incase all the items are consistent to eachother, but + * just in case all the items are consistent to each other, but * all point outside of the leaf. */ if (btrfs_item_end_nr(leaf, slot) > @@ -1098,7 +1098,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr) struct inode *btree_inode = root->fs_info->btree_inode; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) + if (IS_ERR(buf)) return; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, WAIT_NONE, btree_get_extent, 0); @@ -1114,7 +1114,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, int ret; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) + if (IS_ERR(buf)) return 0; set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); @@ -1147,7 +1147,8 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr) { if (btrfs_test_is_dummy_root(root)) - return alloc_test_extent_buffer(root->fs_info, bytenr); + return alloc_test_extent_buffer(root->fs_info, bytenr, + root->nodesize); return alloc_extent_buffer(root->fs_info, bytenr); } @@ -1171,8 +1172,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, int ret; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) - return ERR_PTR(-ENOMEM); + if (IS_ERR(buf)) + return buf; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret) { @@ -1314,14 +1315,16 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info, #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS /* Should only be used by the testing infrastructure */ -struct btrfs_root *btrfs_alloc_dummy_root(void) +struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize) { struct btrfs_root *root; root = btrfs_alloc_root(NULL, GFP_KERNEL); if (!root) return ERR_PTR(-ENOMEM); - __setup_root(4096, 4096, 4096, root, NULL, 1); + /* We don't use the stripesize in selftest, set it as sectorsize */ + __setup_root(nodesize, sectorsize, sectorsize, root, NULL, + BTRFS_ROOT_TREE_OBJECTID); set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); root->alloc_bytenr = 0; @@ -1803,6 +1806,13 @@ static int cleaner_kthread(void *arg) if (btrfs_need_cleaner_sleep(root)) goto sleep; + /* + * Do not do anything if we might cause open_ctree() to block + * before we have finished mounting the filesystem. + */ + if (!root->fs_info->open) + goto sleep; + if (!mutex_trylock(&root->fs_info->cleaner_mutex)) goto sleep; @@ -2517,7 +2527,6 @@ int open_ctree(struct super_block *sb, int num_backups_tried = 0; int backup_index = 0; int max_active; - bool cleaner_mutex_locked = false; tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); @@ -2797,7 +2806,7 @@ int open_ctree(struct super_block *sb, nodesize = btrfs_super_nodesize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); - stripesize = btrfs_super_stripesize(disk_super); + stripesize = sectorsize; fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids)); fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); @@ -2996,13 +3005,6 @@ retry_root_backup: goto fail_sysfs; } - /* - * Hold the cleaner_mutex thread here so that we don't block - * for a long time on btrfs_recover_relocation. cleaner_kthread - * will wait for us to finish mounting the filesystem. - */ - mutex_lock(&fs_info->cleaner_mutex); - cleaner_mutex_locked = true; fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, "btrfs-cleaner"); if (IS_ERR(fs_info->cleaner_kthread)) @@ -3022,7 +3024,7 @@ retry_root_backup: } /* - * Mount does not set all options immediatelly, we can do it now and do + * Mount does not set all options immediately, we can do it now and do * not have to wait for transaction commit */ btrfs_apply_pending_changes(fs_info); @@ -3062,8 +3064,10 @@ retry_root_backup: ret = btrfs_cleanup_fs_roots(fs_info); if (ret) goto fail_qgroup; - /* We locked cleaner_mutex before creating cleaner_kthread. */ + + mutex_lock(&fs_info->cleaner_mutex); ret = btrfs_recover_relocation(tree_root); + mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) { btrfs_warn(fs_info, "failed to recover relocation: %d", ret); @@ -3071,8 +3075,6 @@ retry_root_backup: goto fail_qgroup; } } - mutex_unlock(&fs_info->cleaner_mutex); - cleaner_mutex_locked = false; location.objectid = BTRFS_FS_TREE_OBJECTID; location.type = BTRFS_ROOT_ITEM_KEY; @@ -3186,10 +3188,6 @@ fail_cleaner: filemap_write_and_wait(fs_info->btree_inode->i_mapping); fail_sysfs: - if (cleaner_mutex_locked) { - mutex_unlock(&fs_info->cleaner_mutex); - cleaner_mutex_locked = false; - } btrfs_sysfs_remove_mounted(fs_info); fail_fsdev_sysfs: @@ -3255,7 +3253,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) btrfs_warn_rl_in_rcu(device->dev_root->fs_info, "lost page write due to IO error on %s", rcu_str_deref(device->name)); - /* note, we dont' set_buffer_write_io_error because we have + /* note, we don't set_buffer_write_io_error because we have * our own ways of dealing with the IO errors */ clear_buffer_uptodate(bh); @@ -4130,6 +4128,16 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, * Hint to catch really bogus numbers, bitflips or so, more exact checks are * done later */ + if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) { + btrfs_err(fs_info, "bytes_used is too small %llu", + btrfs_super_bytes_used(sb)); + ret = -EINVAL; + } + if (!is_power_of_2(btrfs_super_stripesize(sb))) { + btrfs_err(fs_info, "invalid stripesize %u", + btrfs_super_stripesize(sb)); + ret = -EINVAL; + } if (btrfs_super_num_devices(sb) > (1UL << 31)) printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", btrfs_super_num_devices(sb)); @@ -4367,7 +4375,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, if (ret) break; - clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); + clear_extent_bits(dirty_pages, start, end, mark); while (start <= end) { eb = btrfs_find_tree_block(root->fs_info, start); start += root->nodesize; @@ -4402,7 +4410,7 @@ again: if (ret) break; - clear_extent_dirty(unpin, start, end, GFP_NOFS); + clear_extent_dirty(unpin, start, end); btrfs_error_unpin_extent_range(root, start, end); cond_resched(); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 8e79d0070bcf..acba821499a9 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -90,7 +90,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, void btrfs_free_fs_root(struct btrfs_root *root); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS -struct btrfs_root *btrfs_alloc_dummy_root(void); +struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize); #endif /* diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9424864fd01a..82b912a293ab 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -231,9 +231,9 @@ static int add_excluded_extent(struct btrfs_root *root, { u64 end = start + num_bytes - 1; set_extent_bits(&root->fs_info->freed_extents[0], - start, end, EXTENT_UPTODATE, GFP_NOFS); + start, end, EXTENT_UPTODATE); set_extent_bits(&root->fs_info->freed_extents[1], - start, end, EXTENT_UPTODATE, GFP_NOFS); + start, end, EXTENT_UPTODATE); return 0; } @@ -246,9 +246,9 @@ static void free_excluded_extents(struct btrfs_root *root, end = start + cache->key.offset - 1; clear_extent_bits(&root->fs_info->freed_extents[0], - start, end, EXTENT_UPTODATE, GFP_NOFS); + start, end, EXTENT_UPTODATE); clear_extent_bits(&root->fs_info->freed_extents[1], - start, end, EXTENT_UPTODATE, GFP_NOFS); + start, end, EXTENT_UPTODATE); } static int exclude_super_stripes(struct btrfs_root *root, @@ -980,7 +980,7 @@ out_free: * event that tree block loses its owner tree's reference and do the * back refs conversion. * - * When a tree block is COW'd through a tree, there are four cases: + * When a tree block is COWed through a tree, there are four cases: * * The reference count of the block is one and the tree is the block's * owner tree. Nothing to do in this case. @@ -2042,6 +2042,11 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, struct btrfs_bio *bbio = NULL; + /* + * Avoid races with device replace and make sure our bbio has devices + * associated to its stripes that don't go away while we are discarding. + */ + btrfs_bio_counter_inc_blocked(root->fs_info); /* Tell the block device(s) that the sectors can be discarded */ ret = btrfs_map_block(root->fs_info, REQ_DISCARD, bytenr, &num_bytes, &bbio, 0); @@ -2074,6 +2079,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, } btrfs_put_bbio(bbio); } + btrfs_bio_counter_dec(root->fs_info); if (actual_bytes) *actual_bytes = discarded_bytes; @@ -2595,7 +2601,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, } /* - * Need to drop our head ref lock and re-aqcuire the + * Need to drop our head ref lock and re-acquire the * delayed ref lock and then re-check to make sure * nobody got added. */ @@ -2747,7 +2753,7 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) /* * We don't ever fill up leaves all the way so multiply by 2 just to be - * closer to what we're really going to want to ouse. + * closer to what we're really going to want to use. */ return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root)); } @@ -2829,6 +2835,7 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, struct async_delayed_refs { struct btrfs_root *root; + u64 transid; int count; int error; int sync; @@ -2844,6 +2851,10 @@ static void delayed_ref_async_start(struct btrfs_work *work) async = container_of(work, struct async_delayed_refs, work); + /* if the commit is already started, we don't need to wait here */ + if (btrfs_transaction_blocked(async->root->fs_info)) + goto done; + trans = btrfs_join_transaction(async->root); if (IS_ERR(trans)) { async->error = PTR_ERR(trans); @@ -2851,14 +2862,19 @@ static void delayed_ref_async_start(struct btrfs_work *work) } /* - * trans->sync means that when we call end_transaciton, we won't + * trans->sync means that when we call end_transaction, we won't * wait on delayed refs */ trans->sync = true; + + /* Don't bother flushing if we got into a different transaction */ + if (trans->transid > async->transid) + goto end; + ret = btrfs_run_delayed_refs(trans, async->root, async->count); if (ret) async->error = ret; - +end: ret = btrfs_end_transaction(trans, async->root); if (ret && !async->error) async->error = ret; @@ -2870,7 +2886,7 @@ done: } int btrfs_async_run_delayed_refs(struct btrfs_root *root, - unsigned long count, int wait) + unsigned long count, u64 transid, int wait) { struct async_delayed_refs *async; int ret; @@ -2882,6 +2898,7 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root, async->root = root->fs_info->tree_root; async->count = count; async->error = 0; + async->transid = transid; if (wait) async->sync = 1; else @@ -4296,7 +4313,7 @@ void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start, * Called if we need to clear a data reservation for this inode * Normally in a error case. * - * This one will handle the per-indoe data rsv map for accurate reserved + * This one will handle the per-inode data rsv map for accurate reserved * space framework. */ void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len) @@ -4967,7 +4984,7 @@ void btrfs_init_async_reclaim_work(struct work_struct *work) * @orig_bytes - the number of bytes we want * @flush - whether or not we can flush to make our reservation * - * This will reserve orgi_bytes number of bytes from the space info associated + * This will reserve orig_bytes number of bytes from the space info associated * with the block_rsv. If there is not enough space it will make an attempt to * flush out space to make room. It will do this by flushing delalloc if * possible or committing the transaction. If flush is 0 then no attempts to @@ -5572,7 +5589,7 @@ void btrfs_orphan_release_metadata(struct inode *inode) * common file/directory operations, they change two fs/file trees * and root tree, the number of items that the qgroup reserves is * different with the free space reservation. So we can not use - * the space reseravtion mechanism in start_transaction(). + * the space reservation mechanism in start_transaction(). */ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, @@ -5621,7 +5638,7 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root, /** * drop_outstanding_extent - drop an outstanding extent * @inode: the inode we're dropping the extent for - * @num_bytes: the number of bytes we're relaseing. + * @num_bytes: the number of bytes we're releasing. * * This is called when we are freeing up an outstanding extent, either called * after an error or after an extent is written. This will return the number of @@ -5647,7 +5664,7 @@ static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes) drop_inode_space = 1; /* - * If we have more or the same amount of outsanding extents than we have + * If we have more or the same amount of outstanding extents than we have * reserved then we need to leave the reserved extents count alone. */ if (BTRFS_I(inode)->outstanding_extents >= @@ -5661,8 +5678,8 @@ static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes) } /** - * calc_csum_metadata_size - return the amount of metada space that must be - * reserved/free'd for the given bytes. + * calc_csum_metadata_size - return the amount of metadata space that must be + * reserved/freed for the given bytes. * @inode: the inode we're manipulating * @num_bytes: the number of bytes in question * @reserve: 1 if we are reserving space, 0 if we are freeing space @@ -5814,7 +5831,7 @@ out_fail: /* * This is tricky, but first we need to figure out how much we - * free'd from any free-ers that occurred during this + * freed from any free-ers that occurred during this * reservation, so we reset ->csum_bytes to the csum_bytes * before we dropped our lock, and then call the free for the * number of bytes that were freed while we were trying our @@ -5836,7 +5853,7 @@ out_fail: /* * Now reset ->csum_bytes to what it should be. If bytes is - * more than to_free then we would have free'd more space had we + * more than to_free then we would have freed more space had we * not had an artificially high ->csum_bytes, so we need to free * the remainder. If bytes is the same or less then we don't * need to do anything, the other free-ers did the correct @@ -6515,7 +6532,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, ret = btrfs_discard_extent(root, start, end + 1 - start, NULL); - clear_extent_dirty(unpin, start, end, GFP_NOFS); + clear_extent_dirty(unpin, start, end); unpin_extent_range(root, start, end, true); mutex_unlock(&fs_info->unused_bg_unpin_mutex); cond_resched(); @@ -7578,7 +7595,7 @@ loop: if (loop == LOOP_CACHING_NOWAIT) { /* * We want to skip the LOOP_CACHING_WAIT step if we - * don't have any unached bgs and we've alrelady done a + * don't have any uncached bgs and we've already done a * full search through. */ if (orig_have_caching_bg || !full_search) @@ -7982,7 +7999,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, /* * Mixed block groups will exclude before processing the log so we only - * need to do the exlude dance if this fs isn't mixed. + * need to do the exclude dance if this fs isn't mixed. */ if (!btrfs_fs_incompat(root->fs_info, MIXED_GROUPS)) { ret = __exclude_logged_extent(root, ins->objectid, ins->offset); @@ -8010,8 +8027,9 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) - return ERR_PTR(-ENOMEM); + if (IS_ERR(buf)) + return buf; + btrfs_set_header_generation(buf, trans->transid); btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); @@ -8032,13 +8050,13 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, buf->start + buf->len - 1, GFP_NOFS); else set_extent_new(&root->dirty_log_pages, buf->start, - buf->start + buf->len - 1, GFP_NOFS); + buf->start + buf->len - 1); } else { buf->log_index = -1; set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); } - trans->blocks_used++; + trans->dirty = true; /* this returns a buffer locked for blocking */ return buf; } @@ -8653,8 +8671,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root->fs_info, bytenr); if (!next) { next = btrfs_find_create_tree_block(root, bytenr); - if (!next) - return -ENOMEM; + if (IS_ERR(next)) + return PTR_ERR(next); + btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, level - 1); reada = 1; @@ -9426,7 +9445,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) u64 free_bytes = 0; int factor; - /* It's df, we don't care if it's racey */ + /* It's df, we don't care if it's racy */ if (list_empty(&sinfo->ro_bgs)) return 0; @@ -10635,14 +10654,14 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) */ mutex_lock(&fs_info->unused_bg_unpin_mutex); ret = clear_extent_bits(&fs_info->freed_extents[0], start, end, - EXTENT_DIRTY, GFP_NOFS); + EXTENT_DIRTY); if (ret) { mutex_unlock(&fs_info->unused_bg_unpin_mutex); btrfs_dec_block_group_ro(root, block_group); goto end_trans; } ret = clear_extent_bits(&fs_info->freed_extents[1], start, end, - EXTENT_DIRTY, GFP_NOFS); + EXTENT_DIRTY); if (ret) { mutex_unlock(&fs_info->unused_bg_unpin_mutex); btrfs_dec_block_group_ro(root, block_group); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2f83448d34fe..75533adef998 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -726,14 +726,6 @@ next: start = last_end + 1; if (start <= end && state && !need_resched()) goto hit_next; - goto search_again; - -out: - spin_unlock(&tree->lock); - if (prealloc) - free_extent_state(prealloc); - - return 0; search_again: if (start > end) @@ -742,6 +734,14 @@ search_again: if (gfpflags_allow_blocking(mask)) cond_resched(); goto again; + +out: + spin_unlock(&tree->lock); + if (prealloc) + free_extent_state(prealloc); + + return 0; + } static void wait_on_state(struct extent_io_tree *tree, @@ -873,8 +873,14 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, bits |= EXTENT_FIRST_DELALLOC; again: if (!prealloc && gfpflags_allow_blocking(mask)) { + /* + * Don't care for allocation failure here because we might end + * up not needing the pre-allocated extent state at all, which + * is the case if we only have in the tree extent states that + * cover our input range and don't cover too any other range. + * If we end up needing a new extent state we allocate it later. + */ prealloc = alloc_extent_state(mask); - BUG_ON(!prealloc); } spin_lock(&tree->lock); @@ -1037,7 +1043,13 @@ hit_next: goto out; } - goto search_again; +search_again: + if (start > end) + goto out; + spin_unlock(&tree->lock); + if (gfpflags_allow_blocking(mask)) + cond_resched(); + goto again; out: spin_unlock(&tree->lock); @@ -1046,13 +1058,6 @@ out: return err; -search_again: - if (start > end) - goto out; - spin_unlock(&tree->lock); - if (gfpflags_allow_blocking(mask)) - cond_resched(); - goto again; } int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, @@ -1073,17 +1078,18 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, * @bits: the bits to set in this range * @clear_bits: the bits to clear in this range * @cached_state: state that we're going to cache - * @mask: the allocation mask * * This will go through and set bits for the given range. If any states exist * already in this range they are set with the given bit and cleared of the * clear_bits. This is only meant to be used by things that are mergeable, ie * converting from say DELALLOC to DIRTY. This is not meant to be used with * boundary bits like LOCK. + * + * All allocations are done with GFP_NOFS. */ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, unsigned clear_bits, - struct extent_state **cached_state, gfp_t mask) + struct extent_state **cached_state) { struct extent_state *state; struct extent_state *prealloc = NULL; @@ -1098,7 +1104,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, btrfs_debug_check_extent_io_range(tree, start, end); again: - if (!prealloc && gfpflags_allow_blocking(mask)) { + if (!prealloc) { /* * Best effort, don't worry if extent state allocation fails * here for the first iteration. We might have a cached state @@ -1106,7 +1112,7 @@ again: * extent state allocations are needed. We'll only know this * after locking the tree. */ - prealloc = alloc_extent_state(mask); + prealloc = alloc_extent_state(GFP_NOFS); if (!prealloc && !first_iteration) return -ENOMEM; } @@ -1263,7 +1269,13 @@ hit_next: goto out; } - goto search_again; +search_again: + if (start > end) + goto out; + spin_unlock(&tree->lock); + cond_resched(); + first_iteration = false; + goto again; out: spin_unlock(&tree->lock); @@ -1271,21 +1283,11 @@ out: free_extent_state(prealloc); return err; - -search_again: - if (start > end) - goto out; - spin_unlock(&tree->lock); - if (gfpflags_allow_blocking(mask)) - cond_resched(); - first_iteration = false; - goto again; } /* wrappers around set/clear extent bit */ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned bits, gfp_t mask, - struct extent_changeset *changeset) + unsigned bits, struct extent_changeset *changeset) { /* * We don't support EXTENT_LOCKED yet, as current changeset will @@ -1295,7 +1297,7 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, */ BUG_ON(bits & EXTENT_LOCKED); - return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, mask, + return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS, changeset); } @@ -1308,8 +1310,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, } int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned bits, gfp_t mask, - struct extent_changeset *changeset) + unsigned bits, struct extent_changeset *changeset) { /* * Don't support EXTENT_LOCKED case, same reason as @@ -1317,7 +1318,7 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, */ BUG_ON(bits & EXTENT_LOCKED); - return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask, + return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, GFP_NOFS, changeset); } @@ -1975,13 +1976,13 @@ int free_io_failure(struct inode *inode, struct io_failure_record *rec) set_state_failrec(failure_tree, rec->start, NULL); ret = clear_extent_bits(failure_tree, rec->start, rec->start + rec->len - 1, - EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); + EXTENT_LOCKED | EXTENT_DIRTY); if (ret) err = ret; ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start, rec->start + rec->len - 1, - EXTENT_DAMAGED, GFP_NOFS); + EXTENT_DAMAGED); if (ret && !err) err = ret; @@ -2024,9 +2025,16 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, bio->bi_iter.bi_size = 0; map_length = length; + /* + * Avoid races with device replace and make sure our bbio has devices + * associated to its stripes that don't go away while we are doing the + * read repair operation. + */ + btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_block(fs_info, WRITE, logical, &map_length, &bbio, mirror_num); if (ret) { + btrfs_bio_counter_dec(fs_info); bio_put(bio); return -EIO; } @@ -2036,6 +2044,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, dev = bbio->stripes[mirror_num-1].dev; btrfs_put_bbio(bbio); if (!dev || !dev->bdev || !dev->writeable) { + btrfs_bio_counter_dec(fs_info); bio_put(bio); return -EIO; } @@ -2044,6 +2053,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) { /* try to remap that extent elsewhere? */ + btrfs_bio_counter_dec(fs_info); bio_put(bio); btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); return -EIO; @@ -2053,6 +2063,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, "read error corrected: ino %llu off %llu (dev %s sector %llu)", btrfs_ino(inode), start, rcu_str_deref(dev->name), sector); + btrfs_bio_counter_dec(fs_info); bio_put(bio); return 0; } @@ -2232,13 +2243,12 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, /* set the bits in the private failure tree */ ret = set_extent_bits(failure_tree, start, end, - EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); + EXTENT_LOCKED | EXTENT_DIRTY); if (ret >= 0) ret = set_state_failrec(failure_tree, start, failrec); /* set the bits in the inode's tree */ if (ret >= 0) - ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED, - GFP_NOFS); + ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED); if (ret < 0) { kfree(failrec); return ret; @@ -4389,8 +4399,12 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (ret < 0) { btrfs_free_path(path); return ret; + } else { + WARN_ON(!ret); + if (ret == 1) + ret = 0; } - WARN_ON(!ret); + path->slots[0]--; btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); found_type = found_key.type; @@ -4601,7 +4615,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb) if (mapped) spin_unlock(&page->mapping->private_lock); - /* One for when we alloced the page */ + /* One for when we allocated the page */ put_page(page); } while (index != 0); } @@ -4714,16 +4728,16 @@ err: } struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start) + u64 start, u32 nodesize) { unsigned long len; if (!fs_info) { /* * Called only from tests that don't always have a fs_info - * available, but we know that nodesize is 4096 + * available */ - len = 4096; + len = nodesize; } else { len = fs_info->tree_root->nodesize; } @@ -4819,7 +4833,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start) + u64 start, u32 nodesize) { struct extent_buffer *eb, *exists = NULL; int ret; @@ -4827,7 +4841,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, eb = find_extent_buffer(fs_info, start); if (eb) return eb; - eb = alloc_dummy_extent_buffer(fs_info, start); + eb = alloc_dummy_extent_buffer(fs_info, start, nodesize); if (!eb) return NULL; eb->fs_info = fs_info; @@ -4878,18 +4892,25 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, int uptodate = 1; int ret; + if (!IS_ALIGNED(start, fs_info->tree_root->sectorsize)) { + btrfs_err(fs_info, "bad tree block start %llu", start); + return ERR_PTR(-EINVAL); + } + eb = find_extent_buffer(fs_info, start); if (eb) return eb; eb = __alloc_extent_buffer(fs_info, start, len); if (!eb) - return NULL; + return ERR_PTR(-ENOMEM); for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL); - if (!p) + if (!p) { + exists = ERR_PTR(-ENOMEM); goto free_eb; + } spin_lock(&mapping->private_lock); if (PagePrivate(p)) { @@ -4934,8 +4955,10 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); again: ret = radix_tree_preload(GFP_NOFS); - if (ret) + if (ret) { + exists = ERR_PTR(ret); goto free_eb; + } spin_lock(&fs_info->buffer_lock); ret = radix_tree_insert(&fs_info->buffer_radix, @@ -5319,6 +5342,11 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, return ret; } +/* + * return 0 if the item is found within a page. + * return 1 if the item spans two pages. + * return -EINVAL otherwise. + */ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long min_len, char **map, unsigned long *map_start, @@ -5333,7 +5361,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, PAGE_SHIFT; if (i != end_i) - return -EINVAL; + return 1; if (i == 0) { offset = start_offset; @@ -5761,7 +5789,7 @@ int try_release_extent_buffer(struct page *page) struct extent_buffer *eb; /* - * We need to make sure noboody is attaching this page to an eb right + * We need to make sure nobody is attaching this page to an eb right * now. */ spin_lock(&page->mapping->private_lock); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 981f402bf754..c0c1c4fef6ce 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -220,8 +220,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, int filled, struct extent_state *cached_state); int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned bits, gfp_t mask, - struct extent_changeset *changeset); + unsigned bits, struct extent_changeset *changeset); int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, int wake, int delete, struct extent_state **cached, gfp_t mask); @@ -240,27 +239,27 @@ static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start, } static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start, - u64 end, unsigned bits, gfp_t mask) + u64 end, unsigned bits) { int wake = 0; if (bits & EXTENT_LOCKED) wake = 1; - return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask); + return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, + GFP_NOFS); } int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned bits, gfp_t mask, - struct extent_changeset *changeset); + unsigned bits, struct extent_changeset *changeset); int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, u64 *failed_start, struct extent_state **cached_state, gfp_t mask); static inline int set_extent_bits(struct extent_io_tree *tree, u64 start, - u64 end, unsigned bits, gfp_t mask) + u64 end, unsigned bits) { - return set_extent_bit(tree, start, end, bits, NULL, NULL, mask); + return set_extent_bit(tree, start, end, bits, NULL, NULL, GFP_NOFS); } static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, @@ -278,37 +277,38 @@ static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start, } static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start, - u64 end, gfp_t mask) + u64 end) { return clear_extent_bit(tree, start, end, EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask); + EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); } int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, unsigned clear_bits, - struct extent_state **cached_state, gfp_t mask); + struct extent_state **cached_state); static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start, - u64 end, struct extent_state **cached_state, gfp_t mask) + u64 end, struct extent_state **cached_state) { return set_extent_bit(tree, start, end, EXTENT_DELALLOC | EXTENT_UPTODATE, - NULL, cached_state, mask); + NULL, cached_state, GFP_NOFS); } static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start, - u64 end, struct extent_state **cached_state, gfp_t mask) + u64 end, struct extent_state **cached_state) { return set_extent_bit(tree, start, end, EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG, - NULL, cached_state, mask); + NULL, cached_state, GFP_NOFS); } static inline int set_extent_new(struct extent_io_tree *tree, u64 start, - u64 end, gfp_t mask) + u64 end) { - return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, NULL, mask); + return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, NULL, + GFP_NOFS); } static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start, @@ -348,7 +348,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, unsigned long len); struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start); + u64 start, u32 nodesize); struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, u64 start); @@ -468,5 +468,5 @@ noinline u64 find_lock_delalloc_range(struct inode *inode, u64 *end, u64 max_bytes); #endif struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start); + u64 start, u32 nodesize); #endif diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 318b048eb254..e0715fcfb11e 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -62,7 +62,7 @@ struct extent_map *alloc_extent_map(void) /** * free_extent_map - drop reference count of an extent_map - * @em: extent map being releasead + * @em: extent map being released * * Drops the reference out on @em by one and free the structure * if the reference count hits zero. diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 7a7d6e253cfc..62a81ee13a5f 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -248,7 +248,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, BTRFS_DATA_RELOC_TREE_OBJECTID) { set_extent_bits(io_tree, offset, offset + root->sectorsize - 1, - EXTENT_NODATASUM, GFP_NOFS); + EXTENT_NODATASUM); } else { btrfs_info(BTRFS_I(inode)->root->fs_info, "no csum found for inode %llu start %llu", diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c98805c35bab..2234e88cf674 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1534,30 +1534,30 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, reserve_bytes = round_up(write_bytes + sector_offset, root->sectorsize); - if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | - BTRFS_INODE_PREALLOC)) && - check_can_nocow(inode, pos, &write_bytes) > 0) { - /* - * For nodata cow case, no need to reserve - * data space. - */ - only_release_metadata = true; - /* - * our prealloc extent may be smaller than - * write_bytes, so scale down. - */ - num_pages = DIV_ROUND_UP(write_bytes + offset, - PAGE_SIZE); - reserve_bytes = round_up(write_bytes + sector_offset, - root->sectorsize); - goto reserve_metadata; - } - ret = btrfs_check_data_free_space(inode, pos, write_bytes); - if (ret < 0) - break; + if (ret < 0) { + if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | + BTRFS_INODE_PREALLOC)) && + check_can_nocow(inode, pos, &write_bytes) > 0) { + /* + * For nodata cow case, no need to reserve + * data space. + */ + only_release_metadata = true; + /* + * our prealloc extent may be smaller than + * write_bytes, so scale down. + */ + num_pages = DIV_ROUND_UP(write_bytes + offset, + PAGE_SIZE); + reserve_bytes = round_up(write_bytes + + sector_offset, + root->sectorsize); + } else { + break; + } + } -reserve_metadata: ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes); if (ret) { if (!only_release_metadata) @@ -1596,6 +1596,13 @@ again: copied = btrfs_copy_from_user(pos, write_bytes, pages, i); + num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, + reserve_bytes); + dirty_sectors = round_up(copied + sector_offset, + root->sectorsize); + dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, + dirty_sectors); + /* * if we have trouble faulting in the pages, fall * back to one page at a time @@ -1605,6 +1612,7 @@ again: if (copied == 0) { force_page_uptodate = true; + dirty_sectors = 0; dirty_pages = 0; } else { force_page_uptodate = false; @@ -1615,20 +1623,19 @@ again: /* * If we had a short copy we need to release the excess delaloc * bytes we reserved. We need to increment outstanding_extents - * because btrfs_delalloc_release_space will decrement it, but + * because btrfs_delalloc_release_space and + * btrfs_delalloc_release_metadata will decrement it, but * we still have an outstanding extent for the chunk we actually * managed to copy. */ - num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, - reserve_bytes); - dirty_sectors = round_up(copied + sector_offset, - root->sectorsize); - dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, - dirty_sectors); - if (num_sectors > dirty_sectors) { - release_bytes = (write_bytes - copied) - & ~((u64)root->sectorsize - 1); + /* + * we round down because we don't want to count + * any partial blocks actually sent through the + * IO machines + */ + release_bytes = round_down(release_bytes - copied, + root->sectorsize); if (copied > 0) { spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++; @@ -2022,7 +2029,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) BTRFS_I(inode)->last_trans <= root->fs_info->last_trans_committed)) { /* - * We'v had everything committed since the last time we were + * We've had everything committed since the last time we were * modified so clear this flag in case it was set for whatever * reason, it's no longer relevant. */ @@ -2370,7 +2377,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) /* Check the aligned pages after the first unaligned page, * if offset != orig_start, which means the first unaligned page - * including serveral following pages are already in holes, + * including several following pages are already in holes, * the extra check can be skipped */ if (offset == orig_start) { /* after truncate page, check hole again */ diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 5e6062c26129..69d270f6602c 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -29,7 +29,7 @@ #include "inode-map.h" #include "volumes.h" -#define BITS_PER_BITMAP (PAGE_SIZE * 8) +#define BITS_PER_BITMAP (PAGE_SIZE * 8UL) #define MAX_CACHE_BYTES_PER_GIG SZ_32K struct btrfs_trim_range { @@ -1415,11 +1415,11 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset) { u64 bitmap_start; - u32 bytes_per_bitmap; + u64 bytes_per_bitmap; bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; bitmap_start = offset - ctl->start; - bitmap_start = div_u64(bitmap_start, bytes_per_bitmap); + bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); bitmap_start *= bytes_per_bitmap; bitmap_start += ctl->start; @@ -1638,10 +1638,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) u64 bitmap_bytes; u64 extent_bytes; u64 size = block_group->key.offset; - u32 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; - u32 max_bitmaps = div_u64(size + bytes_per_bg - 1, bytes_per_bg); + u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; + u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); - max_bitmaps = max_t(u32, max_bitmaps, 1); + max_bitmaps = max_t(u64, max_bitmaps, 1); ASSERT(ctl->total_bitmaps <= max_bitmaps); @@ -1660,7 +1660,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as * we add more bitmaps. */ - bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_SIZE; + bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; if (bitmap_bytes >= max_bytes) { ctl->extents_thresh = 0; @@ -1983,7 +1983,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl, /* * If this block group has some small extents we don't want to * use up all of our free slots in the cache with them, we want - * to reserve them to larger extents, however if we have plent + * to reserve them to larger extents, however if we have plenty * of cache left then go ahead an dadd them, no sense in adding * the overhead of a bitmap if we don't have to. */ @@ -3662,7 +3662,7 @@ have_info: if (tmp->offset + tmp->bytes < offset) break; if (offset + bytes < tmp->offset) { - n = rb_prev(&info->offset_index); + n = rb_prev(&tmp->offset_index); continue; } info = tmp; @@ -3676,7 +3676,7 @@ have_info: if (offset + bytes < tmp->offset) break; if (tmp->offset + tmp->bytes < offset) { - n = rb_next(&info->offset_index); + n = rb_next(&tmp->offset_index); continue; } info = tmp; diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 33178c490ace..3af651c2bbc7 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -123,7 +123,7 @@ int btrfs_return_cluster_to_free_space( int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, u64 *trimmed, u64 start, u64 end, u64 minlen); -/* Support functions for runnint our sanity tests */ +/* Support functions for running our sanity tests */ #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS int test_add_free_space_entry(struct btrfs_block_group_cache *cache, u64 offset, u64 bytes, bool bitmap); diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index aae520b2aee5..a97fdc156a03 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -24,6 +24,11 @@ int __init btrfs_hash_init(void) return PTR_ERR_OR_ZERO(tfm); } +const char* btrfs_crc32c_impl(void) +{ + return crypto_tfm_alg_driver_name(crypto_shash_tfm(tfm)); +} + void btrfs_hash_exit(void) { crypto_free_shash(tfm); diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h index 118a2316e5d3..c3a2ec554361 100644 --- a/fs/btrfs/hash.h +++ b/fs/btrfs/hash.h @@ -22,6 +22,7 @@ int __init btrfs_hash_init(void); void btrfs_hash_exit(void); +const char* btrfs_crc32c_impl(void); u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 91419ef79b00..4421954720b8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -455,7 +455,7 @@ again: /* * skip compression for a small file range(<=blocksize) that - * isn't an inline extent, since it dosen't save disk space at all. + * isn't an inline extent, since it doesn't save disk space at all. */ if (total_compressed <= blocksize && (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) @@ -1978,7 +1978,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, { WARN_ON((end & (PAGE_SIZE - 1)) == 0); return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, - cached_state, GFP_NOFS); + cached_state); } /* see btrfs_writepage_start_hook for details on why this is required */ @@ -3119,8 +3119,7 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { - clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, - GFP_NOFS); + clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM); return 0; } @@ -3272,7 +3271,16 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) /* grab metadata reservation from transaction handle */ if (reserve) { ret = btrfs_orphan_reserve_metadata(trans, inode); - BUG_ON(ret); /* -ENOSPC in reservation; Logic error? JDM */ + ASSERT(!ret); + if (ret) { + atomic_dec(&root->orphan_inodes); + clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, + &BTRFS_I(inode)->runtime_flags); + if (insert) + clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags); + return ret; + } } /* insert an orphan item to track this unlinked/truncated file */ @@ -3722,7 +3730,7 @@ cache_index: * and doesn't have an inode ref with the name "bar" anymore. * * Setting last_unlink_trans to last_trans is a pessimistic approach, - * but it guarantees correctness at the expense of ocassional full + * but it guarantees correctness at the expense of occasional full * transaction commits on fsync if our inode is a directory, or if our * inode is not a directory, logging its parent unnecessarily. */ @@ -4550,6 +4558,7 @@ delete: BUG_ON(ret); if (btrfs_should_throttle_delayed_refs(trans, root)) btrfs_async_run_delayed_refs(root, + trans->transid, trans->delayed_ref_updates * 2, 0); if (be_nice) { if (truncate_space_check(trans, root, @@ -4978,7 +4987,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) * be instantly completed which will give us extents that need * to be truncated. If we fail to get an orphan inode down we * could have left over extents that were never meant to live, - * so we need to garuntee from this point on that everything + * so we need to guarantee from this point on that everything * will be consistent. */ ret = btrfs_orphan_add(trans, inode); @@ -5248,7 +5257,7 @@ void btrfs_evict_inode(struct inode *inode) } /* - * We can't just steal from the global reserve, we need tomake + * We can't just steal from the global reserve, we need to make * sure there is room to do it, if not we need to commit and try * again. */ @@ -5749,6 +5758,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) int name_len; int is_curr = 0; /* ctx->pos points to the current index? */ bool emitted; + bool put = false; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) @@ -5766,7 +5776,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (key_type == BTRFS_DIR_INDEX_KEY) { INIT_LIST_HEAD(&ins_list); INIT_LIST_HEAD(&del_list); - btrfs_get_delayed_items(inode, &ins_list, &del_list); + put = btrfs_readdir_get_delayed_items(inode, &ins_list, + &del_list); } key.type = key_type; @@ -5913,8 +5924,8 @@ next: nopos: ret = 0; err: - if (key_type == BTRFS_DIR_INDEX_KEY) - btrfs_put_delayed_items(&ins_list, &del_list); + if (put) + btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list); btrfs_free_path(path); return ret; } @@ -6980,7 +6991,18 @@ insert: * existing will always be non-NULL, since there must be * extent causing the -EEXIST. */ - if (start >= extent_map_end(existing) || + if (existing->start == em->start && + extent_map_end(existing) == extent_map_end(em) && + em->block_start == existing->block_start) { + /* + * these two extents are the same, it happens + * with inlines especially + */ + free_extent_map(em); + em = existing; + err = 0; + + } else if (start >= extent_map_end(existing) || start <= existing->start) { /* * The existing extent map is the one nearest to @@ -7433,7 +7455,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, cached_state); /* * We're concerned with the entire range that we're going to be - * doing DIO to, so we need to make sure theres no ordered + * doing DIO to, so we need to make sure there's no ordered * extents in this range. */ ordered = btrfs_lookup_ordered_range(inode, lockstart, @@ -7595,7 +7617,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, if (current->journal_info) { /* * Need to pull our outstanding extents and set journal_info to NULL so - * that anything that needs to check if there's a transction doesn't get + * that anything that needs to check if there's a transaction doesn't get * confused. */ dio_data = current->journal_info; @@ -7628,7 +7650,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * decompress it, so there will be buffering required no matter what we * do, so go ahead and fallback to buffered. * - * We return -ENOTBLK because thats what makes DIO go ahead and go back + * We return -ENOTBLK because that's what makes DIO go ahead and go back * to buffered IO. Don't blame me, this is the price we pay for using * the generic code. */ @@ -9041,7 +9063,7 @@ static int btrfs_truncate(struct inode *inode) return ret; /* - * Yes ladies and gentelment, this is indeed ugly. The fact is we have + * Yes ladies and gentlemen, this is indeed ugly. The fact is we have * 3 things going on here * * 1) We need to reserve space for our orphan item and the space to @@ -9055,15 +9077,15 @@ static int btrfs_truncate(struct inode *inode) * space reserved in case it uses space during the truncate (thank you * very much snapshotting). * - * And we need these to all be seperate. The fact is we can use alot of + * And we need these to all be separate. The fact is we can use a lot of * space doing the truncate, and we have no earthly idea how much space - * we will use, so we need the truncate reservation to be seperate so it + * we will use, so we need the truncate reservation to be separate so it * doesn't end up using space reserved for updating the inode or * removing the orphan item. We also need to be able to stop the * transaction and start a new one, which means we need to be able to * update the inode several times, and we have no idea of knowing how * many times that will be, so we can't just reserve 1 item for the - * entirety of the opration, so that has to be done seperately as well. + * entirety of the operation, so that has to be done separately as well. * Then there is the orphan item, which does indeed need to be held on * to for the whole operation, and we need nobody to touch this reserved * space except the orphan code. @@ -10515,7 +10537,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { static const struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = btrfs_real_readdir, + .iterate_shared = btrfs_real_readdir, .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_compat_ioctl, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4e700694b741..05173563e4a6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -296,7 +296,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) } } else { /* - * Revert back under same assuptions as above + * Revert back under same assumptions as above */ if (S_ISREG(mode)) { if (inode->i_size == 0) @@ -465,7 +465,7 @@ static noinline int create_subvol(struct inode *dir, /* * Don't create subvolume whose level is not zero. Or qgroup will be - * screwed up since it assume subvolme qgroup's level to be 0. + * screwed up since it assumes subvolume qgroup's level to be 0. */ if (btrfs_qgroup_level(objectid)) { ret = -ENOSPC; @@ -780,7 +780,7 @@ free_pending: * a. be owner of dir, or * b. be owner of victim, or * c. have CAP_FOWNER capability - * 6. If the victim is append-only or immutable we can't do antyhing with + * 6. If the victim is append-only or immutable we can't do anything with * links pointing to it. * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. @@ -846,11 +846,9 @@ static noinline int btrfs_mksubvol(struct path *parent, struct dentry *dentry; int error; - inode_lock_nested(dir, I_MUTEX_PARENT); - // XXX: should've been - // mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); - // if (error == -EINTR) - // return error; + error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); + if (error == -EINTR) + return error; dentry = lookup_one_len(name, parent->dentry, namelen); error = PTR_ERR(dentry); @@ -1239,7 +1237,7 @@ again: set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, - &cached_state, GFP_NOFS); + &cached_state); unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, &cached_state, @@ -2377,11 +2375,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, goto out; - inode_lock_nested(dir, I_MUTEX_PARENT); - // XXX: should've been - // err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); - // if (err == -EINTR) - // goto out_drop_write; + err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); + if (err == -EINTR) + goto out_drop_write; dentry = lookup_one_len(vol_args->name, parent, namelen); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); @@ -2571,7 +2567,7 @@ out_dput: dput(dentry); out_unlock_dir: inode_unlock(dir); -//out_drop_write: +out_drop_write: mnt_drop_write_file(file); out: kfree(vol_args); @@ -4654,7 +4650,7 @@ again: } /* - * mut. excl. ops lock is locked. Three possibilites: + * mut. excl. ops lock is locked. Three possibilities: * (1) some other op is running * (2) balance is running * (3) balance is paused -- special case (think resume) @@ -5571,7 +5567,7 @@ long btrfs_ioctl(struct file *file, unsigned int ret = btrfs_sync_fs(file_inode(file)->i_sb, 1); /* * The transaction thread may want to do more work, - * namely it pokes the cleaner ktread that will start + * namely it pokes the cleaner kthread that will start * processing uncleaned subvols. */ wake_up_process(root->fs_info->transaction_kthread); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 559170464d7c..aca8264f4a49 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -718,12 +718,13 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, return count; } -void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, +int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, const u64 range_start, const u64 range_len) { struct btrfs_root *root; struct list_head splice; int done; + int total_done = 0; INIT_LIST_HEAD(&splice); @@ -742,6 +743,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, done = btrfs_wait_ordered_extents(root, nr, range_start, range_len); btrfs_put_fs_root(root); + total_done += done; spin_lock(&fs_info->ordered_root_lock); if (nr != -1) { @@ -752,6 +754,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, list_splice_tail(&splice, &fs_info->ordered_roots); spin_unlock(&fs_info->ordered_root_lock); mutex_unlock(&fs_info->ordered_operations_mutex); + + return total_done; } /* @@ -964,6 +968,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct rb_node *prev = NULL; struct btrfs_ordered_extent *test; int ret = 1; + u64 orig_offset = offset; spin_lock_irq(&tree->lock); if (ordered) { @@ -979,7 +984,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, /* truncate file */ if (disk_i_size > i_size) { - BTRFS_I(inode)->disk_i_size = i_size; + BTRFS_I(inode)->disk_i_size = orig_offset; ret = 0; goto out; } diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 8ef12623d65c..451507776ff5 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -58,7 +58,7 @@ struct btrfs_ordered_sum { #define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */ -#define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ +#define BTRFS_ORDERED_PREALLOC 4 /* set when writing to preallocated extent */ #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ @@ -199,7 +199,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum, int len); int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, const u64 range_start, const u64 range_len); -void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, +int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, const u64 range_start, const u64 range_len); void btrfs_get_logged_extents(struct inode *inode, struct list_head *logged_list, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 9e119552ed32..9d4c05b14f6e 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -85,7 +85,7 @@ struct btrfs_qgroup { /* * temp variables for accounting operations - * Refer to qgroup_shared_accouting() for details. + * Refer to qgroup_shared_accounting() for details. */ u64 old_refcnt; u64 new_refcnt; @@ -499,7 +499,7 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) } /* * we call btrfs_free_qgroup_config() when umounting - * filesystem and disabling quota, so we set qgroup_ulit + * filesystem and disabling quota, so we set qgroup_ulist * to be null here to avoid double free. */ ulist_free(fs_info->qgroup_ulist); @@ -1036,7 +1036,7 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info, /* * The easy accounting, if we are adding/removing the only ref for an extent - * then this qgroup and all of the parent qgroups get their refrence and + * then this qgroup and all of the parent qgroups get their reference and * exclusive counts adjusted. * * Caller should hold fs_info->qgroup_lock. @@ -1436,7 +1436,7 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, /* * No need to do lock, since this function will only be called in - * btrfs_commmit_transaction(). + * btrfs_commit_transaction(). */ node = rb_first(&delayed_refs->dirty_extent_root); while (node) { @@ -1557,7 +1557,7 @@ static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info, * A: cur_old_roots < nr_old_roots (not exclusive before) * !A: cur_old_roots == nr_old_roots (possible exclusive before) * B: cur_new_roots < nr_new_roots (not exclusive now) - * !B: cur_new_roots == nr_new_roots (possible exclsuive now) + * !B: cur_new_roots == nr_new_roots (possible exclusive now) * * Results: * +: Possible sharing -> exclusive -: Possible exclusive -> sharing @@ -1851,7 +1851,7 @@ out: } /* - * Copy the acounting information between qgroups. This is necessary + * Copy the accounting information between qgroups. This is necessary * when a snapshot or a subvolume is created. Throwing an error will * cause a transaction abort so we take extra care here to only error * when a readonly fs is a reasonable outcome. @@ -2340,7 +2340,7 @@ out: mutex_unlock(&fs_info->qgroup_rescan_lock); /* - * only update status, since the previous part has alreay updated the + * only update status, since the previous part has already updated the * qgroup info. */ trans = btrfs_start_transaction(fs_info->quota_root, 1); @@ -2542,8 +2542,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len) changeset.bytes_changed = 0; changeset.range_changed = ulist_alloc(GFP_NOFS); ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start, - start + len -1, EXTENT_QGROUP_RESERVED, GFP_NOFS, - &changeset); + start + len -1, EXTENT_QGROUP_RESERVED, &changeset); trace_btrfs_qgroup_reserve_data(inode, start, len, changeset.bytes_changed, QGROUP_RESERVE); @@ -2580,8 +2579,7 @@ static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len, return -ENOMEM; ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, - start + len -1, EXTENT_QGROUP_RESERVED, GFP_NOFS, - &changeset); + start + len -1, EXTENT_QGROUP_RESERVED, &changeset); if (ret < 0) goto out; @@ -2672,7 +2670,7 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes) } /* - * Check qgroup reserved space leaking, normally at destory inode + * Check qgroup reserved space leaking, normally at destroy inode * time */ void btrfs_qgroup_check_reserved_leak(struct inode *inode) @@ -2688,7 +2686,7 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode) return; ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, - EXTENT_QGROUP_RESERVED, GFP_NOFS, &changeset); + EXTENT_QGROUP_RESERVED, &changeset); WARN_ON(ret < 0); if (WARN_ON(changeset.bytes_changed)) { diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 0b7792e02dd5..f8b6d411a034 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -576,7 +576,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last, * we can't merge with cached rbios, since the * idea is that when we merge the destination * rbio is going to run our IO for us. We can - * steal from cached rbio's though, other functions + * steal from cached rbios though, other functions * handle that. */ if (test_bit(RBIO_CACHE_BIT, &last->flags) || @@ -2368,7 +2368,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); } - /* Check scrubbing pairty and repair it */ + /* Check scrubbing parity and repair it */ p = rbio_stripe_page(rbio, rbio->scrubp, pagenr); parity = kmap(p); if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE)) @@ -2493,7 +2493,7 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio) /* * Here means we got one corrupted data stripe and one * corrupted parity on RAID6, if the corrupted parity - * is scrubbing parity, luckly, use the other one to repair + * is scrubbing parity, luckily, use the other one to repair * the data, or we can not repair the data stripe. */ if (failp != rbio->scrubp) diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 298631eaee78..8428db7cd88f 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -761,12 +761,14 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) do { enqueued = 0; + mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry(device, &fs_devices->devices, dev_list) { if (atomic_read(&device->reada_in_flight) < MAX_IN_FLIGHT) enqueued += reada_start_machine_dev(fs_info, device); } + mutex_unlock(&fs_devices->device_list_mutex); total += enqueued; } while (enqueued && total < 10000); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 1cfd35cfac76..0477dca154ed 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -668,8 +668,8 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, * roots of b-trees that reference the tree block. * * the basic idea of this function is check backrefs of a given block - * to find upper level blocks that refernece the block, and then check - * bakcrefs of these upper level blocks recursively. the recursion stop + * to find upper level blocks that reference the block, and then check + * backrefs of these upper level blocks recursively. the recursion stop * when tree root is reached or backrefs for the block is cached. * * NOTE: if we find backrefs for a block are cached, we know backrefs @@ -1160,7 +1160,7 @@ out: if (!RB_EMPTY_NODE(&upper->rb_node)) continue; - /* Add this guy's upper edges to the list to proces */ + /* Add this guy's upper edges to the list to process */ list_for_each_entry(edge, &upper->upper, list[LOWER]) list_add_tail(&edge->list[UPPER], &list); if (list_empty(&upper->upper)) @@ -2396,7 +2396,7 @@ again: } /* - * we keep the old last snapshod transid in rtranid when we + * we keep the old last snapshot transid in rtranid when we * created the relocation tree. */ last_snap = btrfs_root_rtransid(&reloc_root->root_item); @@ -2616,7 +2616,7 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans, * only one thread can access block_rsv at this point, * so we don't need hold lock to protect block_rsv. * we expand more reservation size here to allow enough - * space for relocation and we will return eailer in + * space for relocation and we will return earlier in * enospc case. */ rc->block_rsv->size = tmp + rc->extent_root->nodesize * @@ -2814,7 +2814,7 @@ static void mark_block_processed(struct reloc_control *rc, u64 bytenr, u32 blocksize) { set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1, - EXTENT_DIRTY, GFP_NOFS); + EXTENT_DIRTY); } static void __mark_block_processed(struct reloc_control *rc, @@ -3182,7 +3182,7 @@ static int relocate_file_extent_cluster(struct inode *inode, page_start + offset == cluster->boundary[nr]) { set_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, - EXTENT_BOUNDARY, GFP_NOFS); + EXTENT_BOUNDARY); nr++; } @@ -4059,8 +4059,7 @@ restart: } btrfs_release_path(path); - clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, - GFP_NOFS); + clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY); if (trans) { btrfs_end_transaction_throttle(trans, rc->extent_root); @@ -4591,7 +4590,7 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, /* * called before creating snapshot. it calculates metadata reservation - * requried for relocating tree blocks in the snapshot + * required for relocating tree blocks in the snapshot */ void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending, u64 *bytes_to_reserve) diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index b2b14e7115f1..f1c30861d062 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -71,9 +71,9 @@ static void btrfs_read_root_item(struct extent_buffer *eb, int slot, * search_key: the key to search * path: the path we search * root_item: the root item of the tree we look for - * root_key: the reak key of the tree we look for + * root_key: the root key of the tree we look for * - * If ->offset of 'seach_key' is -1ULL, it means we are not sure the offset + * If ->offset of 'search_key' is -1ULL, it means we are not sure the offset * of the search key, just lookup the root with the highest offset for a * given objectid. * diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index fa35cdc46494..70427ef66b04 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -745,7 +745,7 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) * sure we read the bad mirror. */ ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, - EXTENT_DAMAGED, GFP_NOFS); + EXTENT_DAMAGED); if (ret) { /* set_extent_bits should give proper error */ WARN_ON(ret > 0); @@ -763,7 +763,7 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) end, EXTENT_DAMAGED, 0, NULL); if (!corrected) clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, - EXTENT_DAMAGED, GFP_NOFS); + EXTENT_DAMAGED); } out: @@ -1044,7 +1044,7 @@ nodatasum_case: /* * !is_metadata and !have_csum, this means that the data - * might not be COW'ed, that it might be modified + * might not be COWed, that it might be modified * concurrently. The general strategy to work on the * commit root does not help in the case when COW is not * used. @@ -1125,7 +1125,7 @@ nodatasum_case: * the 2nd page of mirror #1 faces I/O errors, and the 2nd page * of mirror #2 is readable but the final checksum test fails, * then the 2nd page of mirror #3 could be tried, whether now - * the final checksum succeedes. But this would be a rare + * the final checksum succeeds. But this would be a rare * exception and is therefore not implemented. At least it is * avoided that the good copy is overwritten. * A more useful improvement would be to pick the sectors @@ -2181,7 +2181,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; u64 length = sblock->page_count * PAGE_SIZE; u64 logical = sblock->pagev[0]->logical; - struct btrfs_bio *bbio; + struct btrfs_bio *bbio = NULL; struct bio *bio; struct btrfs_raid_bio *rbio; int ret; @@ -2982,6 +2982,7 @@ again: extent_len); mapped_length = extent_len; + bbio = NULL; ret = btrfs_map_block(fs_info, READ, extent_logical, &mapped_length, &bbio, 0); if (!ret) { @@ -3581,6 +3582,46 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, */ scrub_pause_on(fs_info); ret = btrfs_inc_block_group_ro(root, cache); + if (!ret && is_dev_replace) { + /* + * If we are doing a device replace wait for any tasks + * that started dellaloc right before we set the block + * group to RO mode, as they might have just allocated + * an extent from it or decided they could do a nocow + * write. And if any such tasks did that, wait for their + * ordered extents to complete and then commit the + * current transaction, so that we can later see the new + * extent items in the extent tree - the ordered extents + * create delayed data references (for cow writes) when + * they complete, which will be run and insert the + * corresponding extent items into the extent tree when + * we commit the transaction they used when running + * inode.c:btrfs_finish_ordered_io(). We later use + * the commit root of the extent tree to find extents + * to copy from the srcdev into the tgtdev, and we don't + * want to miss any new extents. + */ + btrfs_wait_block_group_reservations(cache); + btrfs_wait_nocow_writers(cache); + ret = btrfs_wait_ordered_roots(fs_info, -1, + cache->key.objectid, + cache->key.offset); + if (ret > 0) { + struct btrfs_trans_handle *trans; + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) + ret = PTR_ERR(trans); + else + ret = btrfs_commit_transaction(trans, + root); + if (ret) { + scrub_pause_off(fs_info); + btrfs_put_block_group(cache); + break; + } + } + } scrub_pause_off(fs_info); if (ret == 0) { @@ -3601,9 +3642,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, break; } + btrfs_dev_replace_lock(&fs_info->dev_replace, 1); dev_replace->cursor_right = found_key.offset + length; dev_replace->cursor_left = found_key.offset; dev_replace->item_needs_writeback = 1; + btrfs_dev_replace_unlock(&fs_info->dev_replace, 1); ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, found_key.offset, cache, is_dev_replace); @@ -3639,6 +3682,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, scrub_pause_off(fs_info); + btrfs_dev_replace_lock(&fs_info->dev_replace, 1); + dev_replace->cursor_left = dev_replace->cursor_right; + dev_replace->item_needs_writeback = 1; + btrfs_dev_replace_unlock(&fs_info->dev_replace, 1); + if (ro_set) btrfs_dec_block_group_ro(root, cache); @@ -3676,9 +3724,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, ret = -ENOMEM; break; } - - dev_replace->cursor_left = dev_replace->cursor_right; - dev_replace->item_needs_writeback = 1; skip: key.offset = found_key.offset + length; btrfs_release_path(path); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 6a8c86074aa4..b71dd298385c 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1831,7 +1831,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, /* * If we have a parent root we need to verify that the parent dir was - * not delted and then re-created, if it was then we have no overwrite + * not deleted and then re-created, if it was then we have no overwrite * and we can just unlink this entry. */ if (sctx->parent_root) { @@ -4192,9 +4192,9 @@ static int __process_new_xattr(int num, struct btrfs_key *di_key, return -ENOMEM; /* - * This hack is needed because empty acl's are stored as zero byte + * This hack is needed because empty acls are stored as zero byte * data in xattrs. Problem with that is, that receiving these zero byte - * acl's will fail later. To fix this, we send a dummy acl list that + * acls will fail later. To fix this, we send a dummy acl list that * only contains the version number and no entries. */ if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) || diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index e05619f241be..875c757e73e2 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c @@ -36,7 +36,7 @@ static inline void put_unaligned_le8(u8 val, void *p) * * The end result is that anyone who #includes ctree.h gets a * declaration for the btrfs_set_foo functions and btrfs_foo functions, - * which are wappers of btrfs_set_token_#bits functions and + * which are wrappers of btrfs_set_token_#bits functions and * btrfs_get_token_#bits functions, which are defined in this file. * * These setget functions do all the extent_buffer related mapping diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index bf71071ab6f6..60e7179ed4b7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -112,7 +112,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) * Note that a running device replace operation is not * canceled here although there is no way to update * the progress. It would add the risk of a deadlock, - * therefore the canceling is ommited. The only penalty + * therefore the canceling is omitted. The only penalty * is that some I/O remains active until the procedure * completes. The next time when the filesystem is * mounted writeable again, the device replace @@ -235,7 +235,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, trans->aborted = errno; /* Nothing used. The other threads that have joined this * transaction may be able to continue. */ - if (!trans->blocks_used && list_empty(&trans->new_bgs)) { + if (!trans->dirty && list_empty(&trans->new_bgs)) { const char *errstr; errstr = btrfs_decode_error(errno); @@ -1807,6 +1807,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) } } sb->s_flags &= ~MS_RDONLY; + + fs_info->open = 1; } out: wake_up_process(fs_info->transaction_kthread); @@ -1877,7 +1879,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) int ret; /* - * We aren't under the device list lock, so this is racey-ish, but good + * We aren't under the device list lock, so this is racy-ish, but good * enough for our purposes. */ nr_devices = fs_info->fs_devices->open_devices; @@ -1896,7 +1898,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) if (!devices_info) return -ENOMEM; - /* calc min stripe number for data space alloction */ + /* calc min stripe number for data space allocation */ type = btrfs_get_alloc_profile(root, 1); if (type & BTRFS_BLOCK_GROUP_RAID0) { min_stripes = 2; @@ -1932,7 +1934,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) avail_space *= BTRFS_STRIPE_LEN; /* - * In order to avoid overwritting the superblock on the drive, + * In order to avoid overwriting the superblock on the drive, * btrfs starts at an offset of at least 1MB when doing chunk * allocation. */ @@ -2303,7 +2305,7 @@ static void btrfs_interface_exit(void) static void btrfs_print_mod_info(void) { - printk(KERN_INFO "Btrfs loaded" + printk(KERN_INFO "Btrfs loaded, crc32c=%s" #ifdef CONFIG_BTRFS_DEBUG ", debug=on" #endif @@ -2313,33 +2315,48 @@ static void btrfs_print_mod_info(void) #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY ", integrity-checker=on" #endif - "\n"); + "\n", + btrfs_crc32c_impl()); } static int btrfs_run_sanity_tests(void) { - int ret; - + int ret, i; + u32 sectorsize, nodesize; + u32 test_sectorsize[] = { + PAGE_SIZE, + }; ret = btrfs_init_test_fs(); if (ret) return ret; - - ret = btrfs_test_free_space_cache(); - if (ret) - goto out; - ret = btrfs_test_extent_buffer_operations(); - if (ret) - goto out; - ret = btrfs_test_extent_io(); - if (ret) - goto out; - ret = btrfs_test_inodes(); - if (ret) - goto out; - ret = btrfs_test_qgroups(); - if (ret) - goto out; - ret = btrfs_test_free_space_tree(); + for (i = 0; i < ARRAY_SIZE(test_sectorsize); i++) { + sectorsize = test_sectorsize[i]; + for (nodesize = sectorsize; + nodesize <= BTRFS_MAX_METADATA_BLOCKSIZE; + nodesize <<= 1) { + pr_info("BTRFS: selftest: sectorsize: %u nodesize: %u\n", + sectorsize, nodesize); + ret = btrfs_test_free_space_cache(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_extent_buffer_operations(sectorsize, + nodesize); + if (ret) + goto out; + ret = btrfs_test_extent_io(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_inodes(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_qgroups(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_free_space_tree(sectorsize, nodesize); + if (ret) + goto out; + } + } out: btrfs_destroy_test_fs(); return ret; diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index f54bf450bad3..02223f3f78f4 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -68,7 +68,7 @@ int btrfs_init_test_fs(void) if (IS_ERR(test_mnt)) { printk(KERN_ERR "btrfs: cannot mount test file system\n"); unregister_filesystem(&test_type); - return ret; + return PTR_ERR(test_mnt); } return 0; } @@ -175,7 +175,7 @@ void btrfs_free_dummy_root(struct btrfs_root *root) } struct btrfs_block_group_cache * -btrfs_alloc_dummy_block_group(unsigned long length) +btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize) { struct btrfs_block_group_cache *cache; @@ -192,8 +192,8 @@ btrfs_alloc_dummy_block_group(unsigned long length) cache->key.objectid = 0; cache->key.offset = length; cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; - cache->sectorsize = 4096; - cache->full_stripe_len = 4096; + cache->sectorsize = sectorsize; + cache->full_stripe_len = sectorsize; INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h index 054b8c73c951..66fb6b701eb7 100644 --- a/fs/btrfs/tests/btrfs-tests.h +++ b/fs/btrfs/tests/btrfs-tests.h @@ -26,27 +26,28 @@ struct btrfs_root; struct btrfs_trans_handle; -int btrfs_test_free_space_cache(void); -int btrfs_test_extent_buffer_operations(void); -int btrfs_test_extent_io(void); -int btrfs_test_inodes(void); -int btrfs_test_qgroups(void); -int btrfs_test_free_space_tree(void); +int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize); +int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize); +int btrfs_test_extent_io(u32 sectorsize, u32 nodesize); +int btrfs_test_inodes(u32 sectorsize, u32 nodesize); +int btrfs_test_qgroups(u32 sectorsize, u32 nodesize); +int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize); int btrfs_init_test_fs(void); void btrfs_destroy_test_fs(void); struct inode *btrfs_new_test_inode(void); struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void); void btrfs_free_dummy_root(struct btrfs_root *root); struct btrfs_block_group_cache * -btrfs_alloc_dummy_block_group(unsigned long length); +btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize); void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache); void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans); #else -static inline int btrfs_test_free_space_cache(void) +static inline int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize) { return 0; } -static inline int btrfs_test_extent_buffer_operations(void) +static inline int btrfs_test_extent_buffer_operations(u32 sectorsize, + u32 nodesize) { return 0; } @@ -57,19 +58,19 @@ static inline int btrfs_init_test_fs(void) static inline void btrfs_destroy_test_fs(void) { } -static inline int btrfs_test_extent_io(void) +static inline int btrfs_test_extent_io(u32 sectorsize, u32 nodesize) { return 0; } -static inline int btrfs_test_inodes(void) +static inline int btrfs_test_inodes(u32 sectorsize, u32 nodesize) { return 0; } -static inline int btrfs_test_qgroups(void) +static inline int btrfs_test_qgroups(u32 sectorsize, u32 nodesize) { return 0; } -static inline int btrfs_test_free_space_tree(void) +static inline int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize) { return 0; } diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index f51963a8f929..4f8cbd1ec5ee 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -22,7 +22,7 @@ #include "../extent_io.h" #include "../disk-io.h" -static int test_btrfs_split_item(void) +static int test_btrfs_split_item(u32 sectorsize, u32 nodesize) { struct btrfs_path *path; struct btrfs_root *root; @@ -40,7 +40,7 @@ static int test_btrfs_split_item(void) test_msg("Running btrfs_split_item tests\n"); - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Could not allocate root\n"); return PTR_ERR(root); @@ -53,7 +53,8 @@ static int test_btrfs_split_item(void) return -ENOMEM; } - path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, 4096); + path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, nodesize, + nodesize); if (!eb) { test_msg("Could not allocate dummy buffer\n"); ret = -ENOMEM; @@ -222,8 +223,8 @@ out: return ret; } -int btrfs_test_extent_buffer_operations(void) +int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize) { - test_msg("Running extent buffer operation tests"); - return test_btrfs_split_item(); + test_msg("Running extent buffer operation tests\n"); + return test_btrfs_split_item(sectorsize, nodesize); } diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 70948b13bc81..d19ab0317283 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -21,6 +21,7 @@ #include <linux/slab.h> #include <linux/sizes.h> #include "btrfs-tests.h" +#include "../ctree.h" #include "../extent_io.h" #define PROCESS_UNLOCK (1 << 0) @@ -65,7 +66,7 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end, return count; } -static int test_find_delalloc(void) +static int test_find_delalloc(u32 sectorsize) { struct inode *inode; struct extent_io_tree tmp; @@ -113,7 +114,7 @@ static int test_find_delalloc(void) * |--- delalloc ---| * |--- search ---| */ - set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL); + set_extent_delalloc(&tmp, 0, sectorsize - 1, NULL); start = 0; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -122,9 +123,9 @@ static int test_find_delalloc(void) test_msg("Should have found at least one delalloc\n"); goto out_bits; } - if (start != 0 || end != 4095) { - test_msg("Expected start 0 end 4095, got start %Lu end %Lu\n", - start, end); + if (start != 0 || end != (sectorsize - 1)) { + test_msg("Expected start 0 end %u, got start %llu end %llu\n", + sectorsize - 1, start, end); goto out_bits; } unlock_extent(&tmp, start, end); @@ -144,7 +145,7 @@ static int test_find_delalloc(void) test_msg("Couldn't find the locked page\n"); goto out_bits; } - set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL); + set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, NULL); start = test_start; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -172,11 +173,11 @@ static int test_find_delalloc(void) * |--- delalloc ---| * |--- search ---| */ - test_start = max_bytes + 4096; + test_start = max_bytes + sectorsize; locked_page = find_lock_page(inode->i_mapping, test_start >> PAGE_SHIFT); if (!locked_page) { - test_msg("Could'nt find the locked page\n"); + test_msg("Couldn't find the locked page\n"); goto out_bits; } start = test_start; @@ -199,7 +200,7 @@ static int test_find_delalloc(void) * * We are re-using our test_start from above since it works out well. */ - set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL); + set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL); start = test_start; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -262,7 +263,7 @@ static int test_find_delalloc(void) } ret = 0; out_bits: - clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL); + clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1); out: if (locked_page) put_page(locked_page); @@ -272,6 +273,16 @@ out: return ret; } +/** + * test_bit_in_byte - Determine whether a bit is set in a byte + * @nr: bit number to test + * @addr: Address to start counting from + */ +static inline int test_bit_in_byte(int nr, const u8 *addr) +{ + return 1UL & (addr[nr / BITS_PER_BYTE] >> (nr & (BITS_PER_BYTE - 1))); +} + static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, unsigned long len) { @@ -298,25 +309,29 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, return -EINVAL; } - bitmap_set(bitmap, (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, - sizeof(long) * BITS_PER_BYTE); - extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0, - sizeof(long) * BITS_PER_BYTE); - if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { - test_msg("Setting straddling pages failed\n"); - return -EINVAL; - } + /* Straddling pages test */ + if (len > PAGE_SIZE) { + bitmap_set(bitmap, + (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, + sizeof(long) * BITS_PER_BYTE); + extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0, + sizeof(long) * BITS_PER_BYTE); + if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { + test_msg("Setting straddling pages failed\n"); + return -EINVAL; + } - bitmap_set(bitmap, 0, len * BITS_PER_BYTE); - bitmap_clear(bitmap, - (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, - sizeof(long) * BITS_PER_BYTE); - extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE); - extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0, - sizeof(long) * BITS_PER_BYTE); - if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { - test_msg("Clearing straddling pages failed\n"); - return -EINVAL; + bitmap_set(bitmap, 0, len * BITS_PER_BYTE); + bitmap_clear(bitmap, + (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, + sizeof(long) * BITS_PER_BYTE); + extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE); + extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0, + sizeof(long) * BITS_PER_BYTE); + if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { + test_msg("Clearing straddling pages failed\n"); + return -EINVAL; + } } /* @@ -333,7 +348,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, for (i = 0; i < len * BITS_PER_BYTE; i++) { int bit, bit1; - bit = !!test_bit(i, bitmap); + bit = !!test_bit_in_byte(i, (u8 *)bitmap); bit1 = !!extent_buffer_test_bit(eb, 0, i); if (bit1 != bit) { test_msg("Testing bit pattern failed\n"); @@ -351,15 +366,22 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, return 0; } -static int test_eb_bitmaps(void) +static int test_eb_bitmaps(u32 sectorsize, u32 nodesize) { - unsigned long len = PAGE_SIZE * 4; + unsigned long len; unsigned long *bitmap; struct extent_buffer *eb; int ret; test_msg("Running extent buffer bitmap tests\n"); + /* + * In ppc64, sectorsize can be 64K, thus 4 * 64K will be larger than + * BTRFS_MAX_METADATA_BLOCKSIZE. + */ + len = (sectorsize < BTRFS_MAX_METADATA_BLOCKSIZE) + ? sectorsize * 4 : sectorsize; + bitmap = kmalloc(len, GFP_KERNEL); if (!bitmap) { test_msg("Couldn't allocate test bitmap\n"); @@ -379,7 +401,7 @@ static int test_eb_bitmaps(void) /* Do it over again with an extent buffer which isn't page-aligned. */ free_extent_buffer(eb); - eb = __alloc_dummy_extent_buffer(NULL, PAGE_SIZE / 2, len); + eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len); if (!eb) { test_msg("Couldn't allocate test extent buffer\n"); kfree(bitmap); @@ -393,17 +415,17 @@ out: return ret; } -int btrfs_test_extent_io(void) +int btrfs_test_extent_io(u32 sectorsize, u32 nodesize) { int ret; test_msg("Running extent I/O tests\n"); - ret = test_find_delalloc(); + ret = test_find_delalloc(sectorsize); if (ret) goto out; - ret = test_eb_bitmaps(); + ret = test_eb_bitmaps(sectorsize, nodesize); out: test_msg("Extent I/O tests finished\n"); return ret; diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index 514247515312..3956bb2ff84c 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -22,10 +22,10 @@ #include "../disk-io.h" #include "../free-space-cache.h" -#define BITS_PER_BITMAP (PAGE_SIZE * 8) +#define BITS_PER_BITMAP (PAGE_SIZE * 8UL) /* - * This test just does basic sanity checking, making sure we can add an exten + * This test just does basic sanity checking, making sure we can add an extent * entry and remove space from either end and the middle, and make sure we can * remove space that covers adjacent extent entries. */ @@ -99,7 +99,8 @@ static int test_extents(struct btrfs_block_group_cache *cache) return 0; } -static int test_bitmaps(struct btrfs_block_group_cache *cache) +static int test_bitmaps(struct btrfs_block_group_cache *cache, + u32 sectorsize) { u64 next_bitmap_offset; int ret; @@ -139,7 +140,7 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache) * The first bitmap we have starts at offset 0 so the next one is just * at the end of the first bitmap. */ - next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); + next_bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize); /* Test a bit straddling two bitmaps */ ret = test_add_free_space_entry(cache, next_bitmap_offset - SZ_2M, @@ -167,9 +168,10 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache) } /* This is the high grade jackassery */ -static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) +static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache, + u32 sectorsize) { - u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); + u64 bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize); int ret; test_msg("Running bitmap and extent tests\n"); @@ -396,11 +398,13 @@ static int check_cache_empty(struct btrfs_block_group_cache *cache) * wasn't optimal as they could be spread all over the block group while under * concurrency (extra overhead and fragmentation). * - * This stealing approach is benefical, since we always prefer to allocate from - * extent entries, both for clustered and non-clustered allocation requests. + * This stealing approach is beneficial, since we always prefer to allocate + * from extent entries, both for clustered and non-clustered allocation + * requests. */ static int -test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) +test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache, + u32 sectorsize) { int ret; u64 offset; @@ -538,7 +542,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * The goal is to test that the bitmap entry space stealing doesn't * steal this space region. */ - ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, 4096); + ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, sectorsize); if (ret) { test_msg("Error adding free space: %d\n", ret); return ret; @@ -596,8 +600,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -ENOENT; } - if (cache->free_space_ctl->free_space != (SZ_1M + 4096)) { - test_msg("Cache free space is not 1Mb + 4Kb\n"); + if (cache->free_space_ctl->free_space != (SZ_1M + sectorsize)) { + test_msg("Cache free space is not 1Mb + %u\n", sectorsize); return -EINVAL; } @@ -610,22 +614,25 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -EINVAL; } - /* All that remains is a 4Kb free space region in a bitmap. Confirm. */ + /* + * All that remains is a sectorsize free space region in a bitmap. + * Confirm. + */ ret = check_num_extents_and_bitmaps(cache, 1, 1); if (ret) return ret; - if (cache->free_space_ctl->free_space != 4096) { - test_msg("Cache free space is not 4Kb\n"); + if (cache->free_space_ctl->free_space != sectorsize) { + test_msg("Cache free space is not %u\n", sectorsize); return -EINVAL; } offset = btrfs_find_space_for_alloc(cache, - 0, 4096, 0, + 0, sectorsize, 0, &max_extent_size); if (offset != (SZ_128M + SZ_16M)) { - test_msg("Failed to allocate 4Kb from space cache, returned offset is: %llu\n", - offset); + test_msg("Failed to allocate %u, returned offset : %llu\n", + sectorsize, offset); return -EINVAL; } @@ -732,7 +739,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * The goal is to test that the bitmap entry space stealing doesn't * steal this space region. */ - ret = btrfs_add_free_space(cache, SZ_32M, 8192); + ret = btrfs_add_free_space(cache, SZ_32M, 2 * sectorsize); if (ret) { test_msg("Error adding free space: %d\n", ret); return ret; @@ -756,7 +763,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) /* * Confirm that our extent entry didn't stole all free space from the - * bitmap, because of the small 8Kb free space region. + * bitmap, because of the small 2 * sectorsize free space region. */ ret = check_num_extents_and_bitmaps(cache, 2, 1); if (ret) @@ -782,8 +789,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -ENOENT; } - if (cache->free_space_ctl->free_space != (SZ_1M + 8192)) { - test_msg("Cache free space is not 1Mb + 8Kb\n"); + if (cache->free_space_ctl->free_space != (SZ_1M + 2 * sectorsize)) { + test_msg("Cache free space is not 1Mb + %u\n", 2 * sectorsize); return -EINVAL; } @@ -795,21 +802,25 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -EINVAL; } - /* All that remains is a 8Kb free space region in a bitmap. Confirm. */ + /* + * All that remains is 2 * sectorsize free space region + * in a bitmap. Confirm. + */ ret = check_num_extents_and_bitmaps(cache, 1, 1); if (ret) return ret; - if (cache->free_space_ctl->free_space != 8192) { - test_msg("Cache free space is not 8Kb\n"); + if (cache->free_space_ctl->free_space != 2 * sectorsize) { + test_msg("Cache free space is not %u\n", 2 * sectorsize); return -EINVAL; } offset = btrfs_find_space_for_alloc(cache, - 0, 8192, 0, + 0, 2 * sectorsize, 0, &max_extent_size); if (offset != SZ_32M) { - test_msg("Failed to allocate 8Kb from space cache, returned offset is: %llu\n", + test_msg("Failed to allocate %u, offset: %llu\n", + 2 * sectorsize, offset); return -EINVAL; } @@ -824,7 +835,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return 0; } -int btrfs_test_free_space_cache(void) +int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize) { struct btrfs_block_group_cache *cache; struct btrfs_root *root = NULL; @@ -832,13 +843,19 @@ int btrfs_test_free_space_cache(void) test_msg("Running btrfs free space cache tests\n"); - cache = btrfs_alloc_dummy_block_group(1024 * 1024 * 1024); + /* + * For ppc64 (with 64k page size), bytes per bitmap might be + * larger than 1G. To make bitmap test available in ppc64, + * alloc dummy block group whose size cross bitmaps. + */ + cache = btrfs_alloc_dummy_block_group(BITS_PER_BITMAP * sectorsize + + PAGE_SIZE, sectorsize); if (!cache) { test_msg("Couldn't run the tests\n"); return 0; } - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { ret = PTR_ERR(root); goto out; @@ -854,14 +871,14 @@ int btrfs_test_free_space_cache(void) ret = test_extents(cache); if (ret) goto out; - ret = test_bitmaps(cache); + ret = test_bitmaps(cache, sectorsize); if (ret) goto out; - ret = test_bitmaps_and_extents(cache); + ret = test_bitmaps_and_extents(cache, sectorsize); if (ret) goto out; - ret = test_steal_space_from_bitmap_to_extent(cache); + ret = test_steal_space_from_bitmap_to_extent(cache, sectorsize); out: btrfs_free_dummy_block_group(cache); btrfs_free_dummy_root(root); diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 7cea4462acd5..aac507085ab0 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include <linux/types.h> #include "btrfs-tests.h" #include "../ctree.h" #include "../disk-io.h" @@ -30,7 +31,7 @@ struct free_space_extent { * The test cases align their operations to this in order to hit some of the * edge cases in the bitmap code. */ -#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * 4096) +#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE) static int __check_free_space_extents(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, @@ -439,7 +440,8 @@ typedef int (*test_func_t)(struct btrfs_trans_handle *, struct btrfs_block_group_cache *, struct btrfs_path *); -static int run_test(test_func_t test_func, int bitmaps) +static int run_test(test_func_t test_func, int bitmaps, + u32 sectorsize, u32 nodesize) { struct btrfs_root *root = NULL; struct btrfs_block_group_cache *cache = NULL; @@ -447,7 +449,7 @@ static int run_test(test_func_t test_func, int bitmaps) struct btrfs_path *path = NULL; int ret; - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Couldn't allocate dummy root\n"); ret = PTR_ERR(root); @@ -466,7 +468,8 @@ static int run_test(test_func_t test_func, int bitmaps) root->fs_info->free_space_root = root; root->fs_info->tree_root = root; - root->node = alloc_test_extent_buffer(root->fs_info, 4096); + root->node = alloc_test_extent_buffer(root->fs_info, + nodesize, nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); ret = -ENOMEM; @@ -474,9 +477,9 @@ static int run_test(test_func_t test_func, int bitmaps) } btrfs_set_header_level(root->node, 0); btrfs_set_header_nritems(root->node, 0); - root->alloc_bytenr += 8192; + root->alloc_bytenr += 2 * nodesize; - cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE); + cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE, sectorsize); if (!cache) { test_msg("Couldn't allocate dummy block group cache\n"); ret = -ENOMEM; @@ -534,17 +537,18 @@ out: return ret; } -static int run_test_both_formats(test_func_t test_func) +static int run_test_both_formats(test_func_t test_func, + u32 sectorsize, u32 nodesize) { int ret; - ret = run_test(test_func, 0); + ret = run_test(test_func, 0, sectorsize, nodesize); if (ret) return ret; - return run_test(test_func, 1); + return run_test(test_func, 1, sectorsize, nodesize); } -int btrfs_test_free_space_tree(void) +int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize) { test_func_t tests[] = { test_empty_block_group, @@ -561,9 +565,11 @@ int btrfs_test_free_space_tree(void) test_msg("Running free space tree tests\n"); for (i = 0; i < ARRAY_SIZE(tests); i++) { - int ret = run_test_both_formats(tests[i]); + int ret = run_test_both_formats(tests[i], sectorsize, + nodesize); if (ret) { - test_msg("%pf failed\n", tests[i]); + test_msg("%pf : sectorsize %u failed\n", + tests[i], sectorsize); return ret; } } diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 863a6a3af1f8..29648c0a39f1 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include <linux/types.h> #include "btrfs-tests.h" #include "../ctree.h" #include "../btrfs_inode.h" @@ -86,19 +87,19 @@ static void insert_inode_item_key(struct btrfs_root *root) * diagram of how the extents will look though this may not be possible we still * want to make sure everything acts normally (the last number is not inclusive) * - * [0 - 5][5 - 6][6 - 10][10 - 4096][ 4096 - 8192 ][8192 - 12288] - * [hole ][inline][ hole ][ regular ][regular1 split][ hole ] + * [0 - 5][5 - 6][ 6 - 4096 ][ 4096 - 4100][4100 - 8195][8195 - 12291] + * [hole ][inline][hole but no extent][ hole ][ regular ][regular1 split] * - * [ 12288 - 20480][20480 - 24576][ 24576 - 28672 ][28672 - 36864][36864 - 45056] - * [regular1 split][ prealloc1 ][prealloc1 written][ prealloc1 ][ compressed ] + * [12291 - 16387][16387 - 24579][24579 - 28675][ 28675 - 32771][32771 - 36867 ] + * [ hole ][regular1 split][ prealloc ][ prealloc1 ][prealloc1 written] * - * [45056 - 49152][49152-53248][53248-61440][61440-65536][ 65536+81920 ] - * [ compressed1 ][ regular ][compressed1][ regular ][ hole but no extent] + * [36867 - 45059][45059 - 53251][53251 - 57347][57347 - 61443][61443- 69635] + * [ prealloc1 ][ compressed ][ compressed1 ][ regular ][ compressed1] * - * [81920-86016] - * [ regular ] + * [69635-73731][ 73731 - 86019 ][86019-90115] + * [ regular ][ hole but no extent][ regular ] */ -static void setup_file_extents(struct btrfs_root *root) +static void setup_file_extents(struct btrfs_root *root, u32 sectorsize) { int slot = 0; u64 disk_bytenr = SZ_1M; @@ -119,7 +120,7 @@ static void setup_file_extents(struct btrfs_root *root) insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0, slot); slot++; - offset = 4096; + offset = sectorsize; /* Now another hole */ insert_extent(root, offset, 4, 4, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0, @@ -128,99 +129,106 @@ static void setup_file_extents(struct btrfs_root *root) offset += 4; /* Now for a regular extent */ - insert_extent(root, offset, 4095, 4095, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); + insert_extent(root, offset, sectorsize - 1, sectorsize - 1, 0, + disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - disk_bytenr += 4096; - offset += 4095; + disk_bytenr += sectorsize; + offset += sectorsize - 1; /* * Now for 3 extents that were split from a hole punch so we test * offsets properly. */ - insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_REG, 0, slot); + insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr, + 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 4096; - insert_extent(root, offset, 4096, 4096, 0, 0, 0, BTRFS_FILE_EXTENT_REG, - 0, slot); + offset += sectorsize; + insert_extent(root, offset, sectorsize, sectorsize, 0, 0, 0, + BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 4096; - insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384, + offset += sectorsize; + insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize, + 2 * sectorsize, disk_bytenr, 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 8192; - disk_bytenr += 16384; + offset += 2 * sectorsize; + disk_bytenr += 4 * sectorsize; /* Now for a unwritten prealloc extent */ - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_PREALLOC, 0, slot); + insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot); slot++; - offset += 4096; + offset += sectorsize; /* * We want to jack up disk_bytenr a little more so the em stuff doesn't * merge our records. */ - disk_bytenr += 8192; + disk_bytenr += 2 * sectorsize; /* * Now for a partially written prealloc extent, basically the same as * the hole punch example above. Ram_bytes never changes when you mark * extents written btw. */ - insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_PREALLOC, 0, slot); + insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr, + 4 * sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot); slot++; - offset += 4096; - insert_extent(root, offset, 4096, 16384, 4096, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_REG, 0, slot); + offset += sectorsize; + insert_extent(root, offset, sectorsize, 4 * sectorsize, sectorsize, + disk_bytenr, 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0, + slot); slot++; - offset += 4096; - insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384, + offset += sectorsize; + insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize, + 2 * sectorsize, disk_bytenr, 4 * sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot); slot++; - offset += 8192; - disk_bytenr += 16384; + offset += 2 * sectorsize; + disk_bytenr += 4 * sectorsize; /* Now a normal compressed extent */ - insert_extent(root, offset, 8192, 8192, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); + insert_extent(root, offset, 2 * sectorsize, 2 * sectorsize, 0, + disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, + BTRFS_COMPRESS_ZLIB, slot); slot++; - offset += 8192; + offset += 2 * sectorsize; /* No merges */ - disk_bytenr += 8192; + disk_bytenr += 2 * sectorsize; /* Now a split compressed extent */ - insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); + insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_REG, + BTRFS_COMPRESS_ZLIB, slot); slot++; - offset += 4096; - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr + 4096, 4096, + offset += sectorsize; + insert_extent(root, offset, sectorsize, sectorsize, 0, + disk_bytenr + sectorsize, sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 4096; - insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 4096, + offset += sectorsize; + insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize, + 2 * sectorsize, disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); slot++; - offset += 8192; - disk_bytenr += 8192; + offset += 2 * sectorsize; + disk_bytenr += 2 * sectorsize; /* Now extents that have a hole but no hole extent */ - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); + insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 16384; - disk_bytenr += 4096; - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); + offset += 4 * sectorsize; + disk_bytenr += sectorsize; + insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); } static unsigned long prealloc_only = 0; static unsigned long compressed_only = 0; static unsigned long vacancy_only = 0; -static noinline int test_btrfs_get_extent(void) +static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) { struct inode *inode = NULL; struct btrfs_root *root = NULL; @@ -240,7 +248,7 @@ static noinline int test_btrfs_get_extent(void) BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; BTRFS_I(inode)->location.offset = 0; - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Couldn't allocate root\n"); goto out; @@ -256,7 +264,7 @@ static noinline int test_btrfs_get_extent(void) goto out; } - root->node = alloc_dummy_extent_buffer(NULL, 4096); + root->node = alloc_dummy_extent_buffer(NULL, nodesize, nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); goto out; @@ -264,7 +272,7 @@ static noinline int test_btrfs_get_extent(void) /* * We will just free a dummy node if it's ref count is 2 so we need an - * extra ref so our searches don't accidently release our page. + * extra ref so our searches don't accidentally release our page. */ extent_buffer_get(root->node); btrfs_set_header_nritems(root->node, 0); @@ -273,7 +281,7 @@ static noinline int test_btrfs_get_extent(void) /* First with no extents */ BTRFS_I(inode)->root = root; - em = btrfs_get_extent(inode, NULL, 0, 0, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, 0, sectorsize, 0); if (IS_ERR(em)) { em = NULL; test_msg("Got an error when we shouldn't have\n"); @@ -295,7 +303,7 @@ static noinline int test_btrfs_get_extent(void) * setup_file_extents, so if you change anything there you need to * update the comment and update the expected values below. */ - setup_file_extents(root); + setup_file_extents(root, sectorsize); em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0); if (IS_ERR(em)) { @@ -318,7 +326,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -327,7 +335,8 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected an inline, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4091) { + + if (em->start != offset || em->len != (sectorsize - 5)) { test_msg("Unexpected extent wanted start %llu len 1, got start " "%llu len %llu\n", offset, em->start, em->len); goto out; @@ -344,7 +353,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -366,7 +375,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Regular extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -375,7 +384,7 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4095) { + if (em->start != offset || em->len != sectorsize - 1) { test_msg("Unexpected extent wanted start %llu len 4095, got " "start %llu len %llu\n", offset, em->start, em->len); goto out; @@ -393,7 +402,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* The next 3 are split extents */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -402,9 +411,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -421,7 +431,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -430,9 +440,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a hole, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -442,7 +453,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -451,9 +462,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -475,7 +487,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Prealloc extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -484,9 +496,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != prealloc_only) { @@ -503,7 +516,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* The next 3 are a half written prealloc extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -512,9 +525,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != prealloc_only) { @@ -532,7 +546,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -541,9 +555,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -564,7 +579,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -573,9 +588,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != prealloc_only) { @@ -598,7 +614,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Now for the compressed extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -607,9 +623,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u," + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != compressed_only) { @@ -631,7 +648,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Split compressed extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -640,9 +657,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u," + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != compressed_only) { @@ -665,7 +683,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -674,9 +692,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -691,7 +710,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -701,9 +720,10 @@ static noinline int test_btrfs_get_extent(void) disk_bytenr, em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != compressed_only) { @@ -725,7 +745,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* A hole between regular extents but no hole extent */ - em = btrfs_get_extent(inode, NULL, 0, offset + 6, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset + 6, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -734,9 +754,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -765,9 +786,10 @@ static noinline int test_btrfs_get_extent(void) * length of the actual hole, if this changes we'll have to change this * test. */ - if (em->start != offset || em->len != 12288) { - test_msg("Unexpected extent wanted start %llu len 12288, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 3 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 3 * sectorsize, em->start, em->len); goto out; } if (em->flags != vacancy_only) { @@ -783,7 +805,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -792,9 +814,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u," + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -815,7 +838,7 @@ out: return ret; } -static int test_hole_first(void) +static int test_hole_first(u32 sectorsize, u32 nodesize) { struct inode *inode = NULL; struct btrfs_root *root = NULL; @@ -832,7 +855,7 @@ static int test_hole_first(void) BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; BTRFS_I(inode)->location.offset = 0; - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Couldn't allocate root\n"); goto out; @@ -844,7 +867,7 @@ static int test_hole_first(void) goto out; } - root->node = alloc_dummy_extent_buffer(NULL, 4096); + root->node = alloc_dummy_extent_buffer(NULL, nodesize, nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); goto out; @@ -861,9 +884,9 @@ static int test_hole_first(void) * btrfs_get_extent. */ insert_inode_item_key(root); - insert_extent(root, 4096, 4096, 4096, 0, 4096, 4096, - BTRFS_FILE_EXTENT_REG, 0, 1); - em = btrfs_get_extent(inode, NULL, 0, 0, 8192, 0); + insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize, + sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1); + em = btrfs_get_extent(inode, NULL, 0, 0, 2 * sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -872,9 +895,10 @@ static int test_hole_first(void) test_msg("Expected a hole, got %llu\n", em->block_start); goto out; } - if (em->start != 0 || em->len != 4096) { - test_msg("Unexpected extent wanted start 0 len 4096, got start " - "%llu len %llu\n", em->start, em->len); + if (em->start != 0 || em->len != sectorsize) { + test_msg("Unexpected extent wanted start 0 len %u, " + "got start %llu len %llu\n", + sectorsize, em->start, em->len); goto out; } if (em->flags != vacancy_only) { @@ -884,18 +908,19 @@ static int test_hole_first(void) } free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, 4096, 8192, 0); + em = btrfs_get_extent(inode, NULL, 0, sectorsize, 2 * sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; } - if (em->block_start != 4096) { + if (em->block_start != sectorsize) { test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != 4096 || em->len != 4096) { - test_msg("Unexpected extent wanted start 4096 len 4096, got " - "start %llu len %llu\n", em->start, em->len); + if (em->start != sectorsize || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %u len %u, " + "got start %llu len %llu\n", + sectorsize, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -912,7 +937,7 @@ out: return ret; } -static int test_extent_accounting(void) +static int test_extent_accounting(u32 sectorsize, u32 nodesize) { struct inode *inode = NULL; struct btrfs_root *root = NULL; @@ -924,7 +949,7 @@ static int test_extent_accounting(void) return ret; } - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Couldn't allocate root\n"); goto out; @@ -954,10 +979,11 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE][4k] */ + /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ BTRFS_I(inode)->outstanding_extents++; ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE, - BTRFS_MAX_EXTENT_SIZE + 4095, NULL); + BTRFS_MAX_EXTENT_SIZE + sectorsize - 1, + NULL); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -969,10 +995,10 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */ + /* [BTRFS_MAX_EXTENT_SIZE/2][sectorsize HOLE][the rest] */ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, BTRFS_MAX_EXTENT_SIZE >> 1, - (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, + (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1, EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_KERNEL); @@ -987,10 +1013,11 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE][4K] */ + /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ BTRFS_I(inode)->outstanding_extents++; ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1, - (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, + (BTRFS_MAX_EXTENT_SIZE >> 1) + + sectorsize - 1, NULL); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); @@ -1004,16 +1031,17 @@ static int test_extent_accounting(void) } /* - * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K] + * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize] * * I'm artificially adding 2 to outstanding_extents because in the * buffered IO case we'd add things up as we go, but I don't feel like * doing that here, this isn't the interesting case we want to test. */ BTRFS_I(inode)->outstanding_extents += 2; - ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192, - (BTRFS_MAX_EXTENT_SIZE << 1) + 12287, - NULL); + ret = btrfs_set_extent_delalloc(inode, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize, + (BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1, + NULL); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1025,10 +1053,13 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */ + /* + * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize] + */ BTRFS_I(inode)->outstanding_extents++; - ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, - BTRFS_MAX_EXTENT_SIZE+8191, NULL); + ret = btrfs_set_extent_delalloc(inode, + BTRFS_MAX_EXTENT_SIZE + sectorsize, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1042,8 +1073,8 @@ static int test_extent_accounting(void) /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, - BTRFS_MAX_EXTENT_SIZE+4096, - BTRFS_MAX_EXTENT_SIZE+8191, + BTRFS_MAX_EXTENT_SIZE + sectorsize, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, NULL, GFP_KERNEL); @@ -1063,8 +1094,9 @@ static int test_extent_accounting(void) * might fail and I'd rather satisfy my paranoia at this point. */ BTRFS_I(inode)->outstanding_extents++; - ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, - BTRFS_MAX_EXTENT_SIZE+8191, NULL); + ret = btrfs_set_extent_delalloc(inode, + BTRFS_MAX_EXTENT_SIZE + sectorsize, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1103,7 +1135,7 @@ out: return ret; } -int btrfs_test_inodes(void) +int btrfs_test_inodes(u32 sectorsize, u32 nodesize) { int ret; @@ -1112,13 +1144,13 @@ int btrfs_test_inodes(void) set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only); test_msg("Running btrfs_get_extent tests\n"); - ret = test_btrfs_get_extent(); + ret = test_btrfs_get_extent(sectorsize, nodesize); if (ret) return ret; test_msg("Running hole first btrfs_get_extent test\n"); - ret = test_hole_first(); + ret = test_hole_first(sectorsize, nodesize); if (ret) return ret; test_msg("Running outstanding_extents tests\n"); - return test_extent_accounting(); + return test_extent_accounting(sectorsize, nodesize); } diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 8ea5d34bc5a2..57a12c0d680b 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include <linux/types.h> #include "btrfs-tests.h" #include "../ctree.h" #include "../transaction.h" @@ -216,7 +217,8 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr, return ret; } -static int test_no_shared_qgroup(struct btrfs_root *root) +static int test_no_shared_qgroup(struct btrfs_root *root, + u32 sectorsize, u32 nodesize) { struct btrfs_trans_handle trans; struct btrfs_fs_info *fs_info = root->fs_info; @@ -227,29 +229,30 @@ static int test_no_shared_qgroup(struct btrfs_root *root) btrfs_init_dummy_trans(&trans); test_msg("Qgroup basic add\n"); - ret = btrfs_create_qgroup(NULL, fs_info, 5); + ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID); if (ret) { test_msg("Couldn't create a qgroup %d\n", ret); return ret; } /* - * Since the test trans doesn't havee the complicated delayed refs, + * Since the test trans doesn't have the complicated delayed refs, * we can only call btrfs_qgroup_account_extent() directly to test * quota. */ - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5); + ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, + BTRFS_FS_TREE_OBJECTID); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -257,32 +260,33 @@ static int test_no_shared_qgroup(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, nodesize)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } old_roots = NULL; new_roots = NULL; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = remove_extent_item(root, 4096, 4096); + ret = remove_extent_item(root, nodesize, nodesize); if (ret) return -EINVAL; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -290,14 +294,14 @@ static int test_no_shared_qgroup(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return -EINVAL; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 0, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, 0, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } @@ -310,7 +314,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root) * right, also remove one of the roots and make sure the exclusive count is * adjusted properly. */ -static int test_multiple_refs(struct btrfs_root *root) +static int test_multiple_refs(struct btrfs_root *root, + u32 sectorsize, u32 nodesize) { struct btrfs_trans_handle trans; struct btrfs_fs_info *fs_info = root->fs_info; @@ -322,25 +327,29 @@ static int test_multiple_refs(struct btrfs_root *root) test_msg("Qgroup multiple refs test\n"); - /* We have 5 created already from the previous test */ - ret = btrfs_create_qgroup(NULL, fs_info, 256); + /* + * We have BTRFS_FS_TREE_OBJECTID created already from the + * previous test. + */ + ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID); if (ret) { test_msg("Couldn't create a qgroup %d\n", ret); return ret; } - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5); + ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, + BTRFS_FS_TREE_OBJECTID); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -348,30 +357,32 @@ static int test_multiple_refs(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, nodesize)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = add_tree_ref(root, 4096, 4096, 0, 256); + ret = add_tree_ref(root, nodesize, nodesize, 0, + BTRFS_FIRST_FREE_OBJECTID); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -379,35 +390,38 @@ static int test_multiple_refs(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - if (btrfs_verify_qgroup_counts(fs_info, 256, 4096, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID, + nodesize, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = remove_extent_ref(root, 4096, 4096, 0, 256); + ret = remove_extent_ref(root, nodesize, nodesize, 0, + BTRFS_FIRST_FREE_OBJECTID); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -415,19 +429,21 @@ static int test_multiple_refs(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 256, 0, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID, + 0, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, nodesize)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } @@ -435,13 +451,13 @@ static int test_multiple_refs(struct btrfs_root *root) return 0; } -int btrfs_test_qgroups(void) +int btrfs_test_qgroups(u32 sectorsize, u32 nodesize) { struct btrfs_root *root; struct btrfs_root *tmp_root; int ret = 0; - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Couldn't allocate root\n"); return PTR_ERR(root); @@ -468,7 +484,8 @@ int btrfs_test_qgroups(void) * Can't use bytenr 0, some things freak out * *cough*backref walking code*cough* */ - root->node = alloc_test_extent_buffer(root->fs_info, 4096); + root->node = alloc_test_extent_buffer(root->fs_info, nodesize, + nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); ret = -ENOMEM; @@ -476,16 +493,16 @@ int btrfs_test_qgroups(void) } btrfs_set_header_level(root->node, 0); btrfs_set_header_nritems(root->node, 0); - root->alloc_bytenr += 8192; + root->alloc_bytenr += 2 * nodesize; - tmp_root = btrfs_alloc_dummy_root(); + tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(tmp_root)) { test_msg("Couldn't allocate a fs root\n"); ret = PTR_ERR(tmp_root); goto out; } - tmp_root->root_key.objectid = 5; + tmp_root->root_key.objectid = BTRFS_FS_TREE_OBJECTID; root->fs_info->fs_root = tmp_root; ret = btrfs_insert_fs_root(root->fs_info, tmp_root); if (ret) { @@ -493,14 +510,14 @@ int btrfs_test_qgroups(void) goto out; } - tmp_root = btrfs_alloc_dummy_root(); + tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(tmp_root)) { test_msg("Couldn't allocate a fs root\n"); ret = PTR_ERR(tmp_root); goto out; } - tmp_root->root_key.objectid = 256; + tmp_root->root_key.objectid = BTRFS_FIRST_FREE_OBJECTID; ret = btrfs_insert_fs_root(root->fs_info, tmp_root); if (ret) { test_msg("Couldn't insert fs root %d\n", ret); @@ -508,10 +525,10 @@ int btrfs_test_qgroups(void) } test_msg("Running qgroup tests\n"); - ret = test_no_shared_qgroup(root); + ret = test_no_shared_qgroup(root, sectorsize, nodesize); if (ret) goto out; - ret = test_multiple_refs(root); + ret = test_multiple_refs(root, sectorsize, nodesize); out: btrfs_free_dummy_root(root); return ret; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5b0b758a3f79..948aa186b353 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -818,6 +818,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, { struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_fs_info *info = root->fs_info; + u64 transid = trans->transid; unsigned long cur = trans->delayed_ref_updates; int lock = (trans->type != TRANS_JOIN_NOLOCK); int err = 0; @@ -905,7 +906,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); if (must_run_delayed_refs) { - btrfs_async_run_delayed_refs(root, cur, + btrfs_async_run_delayed_refs(root, cur, transid, must_run_delayed_refs == 1); } return err; @@ -944,7 +945,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root, err = convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, - mark, &cached_state, GFP_NOFS); + mark, &cached_state); /* * convert_extent_bit can return -ENOMEM, which is most of the * time a temporary error. So when it happens, ignore the error @@ -1311,11 +1312,6 @@ int btrfs_defrag_root(struct btrfs_root *root) return ret; } -/* Bisesctability fixup, remove in 4.8 */ -#ifndef btrfs_std_error -#define btrfs_std_error btrfs_handle_fs_error -#endif - /* * Do all special snapshot related qgroup dirty hack. * @@ -1385,7 +1381,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, switch_commit_roots(trans->transaction, fs_info); ret = btrfs_write_and_wait_transaction(trans, src); if (ret) - btrfs_std_error(fs_info, ret, + btrfs_handle_fs_error(fs_info, ret, "Error while writing out transaction for qgroup"); out: diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 72be51f7ca2f..c5abee4f01ad 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -110,7 +110,6 @@ struct btrfs_trans_handle { u64 chunk_bytes_reserved; unsigned long use_count; unsigned long blocks_reserved; - unsigned long blocks_used; unsigned long delayed_ref_updates; struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; @@ -121,6 +120,7 @@ struct btrfs_trans_handle { bool can_flush_pending_bgs; bool reloc_reserved; bool sync; + bool dirty; unsigned int type; /* * this root is only needed to validate that the root passed to @@ -144,7 +144,7 @@ struct btrfs_pending_snapshot { /* block reservation for the operation */ struct btrfs_block_rsv block_rsv; u64 qgroup_reserved; - /* extra metadata reseration for relocation */ + /* extra metadata reservation for relocation */ int error; bool readonly; struct list_head list; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8aaca5c6af94..c05f69a8ec42 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2330,7 +2330,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, break; /* for regular files, make sure corresponding - * orhpan item exist. extents past the new EOF + * orphan item exist. extents past the new EOF * will be truncated later by orphan cleanup. */ if (S_ISREG(mode)) { @@ -2422,8 +2422,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, root_owner = btrfs_header_owner(parent); next = btrfs_find_create_tree_block(root, bytenr); - if (!next) - return -ENOMEM; + if (IS_ERR(next)) + return PTR_ERR(next); if (*level == 1) { ret = wc->process_func(root, next, wc, ptr_gen); @@ -3001,7 +3001,7 @@ static void free_log_tree(struct btrfs_trans_handle *trans, break; clear_extent_bits(&log->dirty_log_pages, start, end, - EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); + EXTENT_DIRTY | EXTENT_NEW); } /* @@ -4914,7 +4914,7 @@ out_unlock: * the actual unlink operation, so if we do this check before a concurrent task * sets last_unlink_trans it means we've logged a consistent version/state of * all the inode items, otherwise we are not sure and must do a transaction - * commit (the concurrent task migth have only updated last_unlink_trans before + * commit (the concurrent task might have only updated last_unlink_trans before * we logged the inode or it might have also done the unlink). */ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans, @@ -4973,7 +4973,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, while (1) { /* * If we are logging a directory then we start with our inode, - * not our parents inode, so we need to skipp setting the + * not our parent's inode, so we need to skip setting the * logged_trans so that further down in the log code we don't * think this inode has already been logged. */ @@ -5357,7 +5357,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, log_dentries = true; /* - * On unlink we must make sure all our current and old parent directores + * On unlink we must make sure all our current and old parent directory * inodes are fully logged. This is to prevent leaving dangling * directory index entries in directories that were our parents but are * not anymore. Not doing this results in old parent directory being diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index 91feb2bdefee..b1434bb57e36 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c @@ -28,7 +28,7 @@ * } * ulist_free(ulist); * - * This assumes the graph nodes are adressable by u64. This stems from the + * This assumes the graph nodes are addressable by u64. This stems from the * usage for tree enumeration in btrfs, where the logical addresses are * 64 bit. * diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2b88127bba5b..589f128173b1 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2190,7 +2190,7 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) } /* - * strore the expected generation for seed devices in device items. + * Store the expected generation for seed devices in device items. */ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans, struct btrfs_root *root) @@ -2761,6 +2761,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 dev_extent_len = 0; u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; int i, ret = 0; + struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; /* Just in case */ root = root->fs_info->chunk_root; @@ -2787,12 +2788,19 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, check_system_chunk(trans, extent_root, map->type); unlock_chunks(root->fs_info->chunk_root); + /* + * Take the device list mutex to prevent races with the final phase of + * a device replace operation that replaces the device object associated + * with map stripes (dev-replace.c:btrfs_dev_replace_finishing()). + */ + mutex_lock(&fs_devices->device_list_mutex); for (i = 0; i < map->num_stripes; i++) { struct btrfs_device *device = map->stripes[i].dev; ret = btrfs_free_dev_extent(trans, device, map->stripes[i].physical, &dev_extent_len); if (ret) { + mutex_unlock(&fs_devices->device_list_mutex); btrfs_abort_transaction(trans, root, ret); goto out; } @@ -2811,11 +2819,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, if (map->stripes[i].dev) { ret = btrfs_update_device(trans, map->stripes[i].dev); if (ret) { + mutex_unlock(&fs_devices->device_list_mutex); btrfs_abort_transaction(trans, root, ret); goto out; } } } + mutex_unlock(&fs_devices->device_list_mutex); + ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset); if (ret) { btrfs_abort_transaction(trans, root, ret); @@ -3387,7 +3398,7 @@ static int should_balance_chunk(struct btrfs_root *root, } else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) { /* * Same logic as the 'limit' filter; the minimum cannot be - * determined here because we do not have the global informatoin + * determined here because we do not have the global information * about the count of all chunks that satisfy the filters. */ if (bargs->limit_max == 0) @@ -4230,6 +4241,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) if (IS_ERR(uuid_root)) { ret = PTR_ERR(uuid_root); btrfs_abort_transaction(trans, tree_root, ret); + btrfs_end_transaction(trans, tree_root); return ret; } @@ -4682,12 +4694,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (type & BTRFS_BLOCK_GROUP_RAID5) { raid_stripe_len = find_raid56_stripe_len(ndevs - 1, - btrfs_super_stripesize(info->super_copy)); + extent_root->stripesize); data_stripes = num_stripes - 1; } if (type & BTRFS_BLOCK_GROUP_RAID6) { raid_stripe_len = find_raid56_stripe_len(ndevs - 2, - btrfs_super_stripesize(info->super_copy)); + extent_root->stripesize); data_stripes = num_stripes - 2; } @@ -5762,20 +5774,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } } if (found) { - if (physical_of_found + map->stripe_len <= - dev_replace->cursor_left) { - struct btrfs_bio_stripe *tgtdev_stripe = - bbio->stripes + num_stripes; + struct btrfs_bio_stripe *tgtdev_stripe = + bbio->stripes + num_stripes; - tgtdev_stripe->physical = physical_of_found; - tgtdev_stripe->length = - bbio->stripes[index_srcdev].length; - tgtdev_stripe->dev = dev_replace->tgtdev; - bbio->tgtdev_map[index_srcdev] = num_stripes; + tgtdev_stripe->physical = physical_of_found; + tgtdev_stripe->length = + bbio->stripes[index_srcdev].length; + tgtdev_stripe->dev = dev_replace->tgtdev; + bbio->tgtdev_map[index_srcdev] = num_stripes; - tgtdev_indexes++; - num_stripes++; - } + tgtdev_indexes++; + num_stripes++; } } @@ -6076,7 +6085,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) { atomic_inc(&bbio->error); if (atomic_dec_and_test(&bbio->stripes_pending)) { - /* Shoud be the original bio. */ + /* Should be the original bio. */ WARN_ON(bio != bbio->orig_bio); btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; @@ -6250,27 +6259,23 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, return dev; } -static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, - struct extent_buffer *leaf, - struct btrfs_chunk *chunk) +/* Return -EIO if any error, otherwise return 0. */ +static int btrfs_check_chunk_valid(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, u64 logical) { - struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; - struct map_lookup *map; - struct extent_map *em; - u64 logical; u64 length; u64 stripe_len; - u64 devid; - u8 uuid[BTRFS_UUID_SIZE]; - int num_stripes; - int ret; - int i; + u16 num_stripes; + u16 sub_stripes; + u64 type; - logical = key->offset; length = btrfs_chunk_length(leaf, chunk); stripe_len = btrfs_chunk_stripe_len(leaf, chunk); num_stripes = btrfs_chunk_num_stripes(leaf, chunk); - /* Validation check */ + sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); + type = btrfs_chunk_type(leaf, chunk); + if (!num_stripes) { btrfs_err(root->fs_info, "invalid chunk num_stripes: %u", num_stripes); @@ -6281,6 +6286,11 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, "invalid chunk logical %llu", logical); return -EIO; } + if (btrfs_chunk_sector_size(leaf, chunk) != root->sectorsize) { + btrfs_err(root->fs_info, "invalid chunk sectorsize %u", + btrfs_chunk_sector_size(leaf, chunk)); + return -EIO; + } if (!length || !IS_ALIGNED(length, root->sectorsize)) { btrfs_err(root->fs_info, "invalid chunk length %llu", length); @@ -6292,13 +6302,54 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, return -EIO; } if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & - btrfs_chunk_type(leaf, chunk)) { + type) { btrfs_err(root->fs_info, "unrecognized chunk type: %llu", ~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & btrfs_chunk_type(leaf, chunk)); return -EIO; } + if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || + (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) || + (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || + (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || + (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) || + ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && + num_stripes != 1)) { + btrfs_err(root->fs_info, + "invalid num_stripes:sub_stripes %u:%u for profile %llu", + num_stripes, sub_stripes, + type & BTRFS_BLOCK_GROUP_PROFILE_MASK); + return -EIO; + } + + return 0; +} + +static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk) +{ + struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; + struct map_lookup *map; + struct extent_map *em; + u64 logical; + u64 length; + u64 stripe_len; + u64 devid; + u8 uuid[BTRFS_UUID_SIZE]; + int num_stripes; + int ret; + int i; + + logical = key->offset; + length = btrfs_chunk_length(leaf, chunk); + stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + + ret = btrfs_check_chunk_valid(root, leaf, chunk, logical); + if (ret) + return ret; read_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); @@ -6546,6 +6597,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) u32 array_size; u32 len = 0; u32 cur_offset; + u64 type; struct btrfs_key key; ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize); @@ -6555,12 +6607,12 @@ int btrfs_read_sys_array(struct btrfs_root *root) * overallocate but we can keep it as-is, only the first page is used. */ sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET); - if (!sb) - return -ENOMEM; + if (IS_ERR(sb)) + return PTR_ERR(sb); set_extent_buffer_uptodate(sb); btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); /* - * The sb extent buffer is artifical and just used to read the system array. + * The sb extent buffer is artificial and just used to read the system array. * set_extent_buffer_uptodate() call does not properly mark all it's * pages up-to-date when the page is larger: extent does not cover the * whole page and consequently check_page_uptodate does not find all @@ -6612,6 +6664,15 @@ int btrfs_read_sys_array(struct btrfs_root *root) break; } + type = btrfs_chunk_type(sb, chunk); + if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) { + btrfs_err(root->fs_info, + "invalid chunk type %llu in sys_array at offset %u", + type, cur_offset); + ret = -EIO; + break; + } + len = btrfs_chunk_item_size(num_stripes); if (cur_offset + len > array_size) goto out_short_read; @@ -6630,13 +6691,15 @@ int btrfs_read_sys_array(struct btrfs_root *root) sb_array_offset += len; cur_offset += len; } - free_extent_buffer(sb); + clear_extent_buffer_uptodate(sb); + free_extent_buffer_stale(sb); return ret; out_short_read: printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n", len, cur_offset); - free_extent_buffer(sb); + clear_extent_buffer_uptodate(sb); + free_extent_buffer_stale(sb); return -EIO; } @@ -6648,6 +6711,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) struct btrfs_key found_key; int ret; int slot; + u64 total_dev = 0; root = root->fs_info->chunk_root; @@ -6689,6 +6753,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) ret = read_one_dev(root, leaf, dev_item); if (ret) goto error; + total_dev++; } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { struct btrfs_chunk *chunk; chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); @@ -6698,6 +6763,28 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) } path->slots[0]++; } + + /* + * After loading chunk tree, we've got all device information, + * do another round of validation checks. + */ + if (total_dev != root->fs_info->fs_devices->total_devices) { + btrfs_err(root->fs_info, + "super_num_devices %llu mismatch with num_devices %llu found here", + btrfs_super_num_devices(root->fs_info->super_copy), + total_dev); + ret = -EINVAL; + goto error; + } + if (btrfs_super_total_bytes(root->fs_info->super_copy) < + root->fs_info->fs_devices->total_rw_bytes) { + btrfs_err(root->fs_info, + "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu", + btrfs_super_total_bytes(root->fs_info->super_copy), + root->fs_info->fs_devices->total_rw_bytes); + ret = -EINVAL; + goto error; + } ret = 0; error: unlock_chunks(root); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 3bfb252206c7..d1a177a3dbe8 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -380,23 +380,21 @@ static int btrfs_xattr_handler_get(const struct xattr_handler *handler, } static int btrfs_xattr_handler_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *buffer, size_t size, - int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *buffer, + size_t size, int flags) { - struct inode *inode = d_inode(dentry); - name = xattr_full_name(handler, name); return __btrfs_setxattr(NULL, inode, name, buffer, size, flags); } static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, - struct dentry *dentry, + struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) { name = xattr_full_name(handler, name); - return btrfs_set_prop(d_inode(dentry), name, value, size, flags); + return btrfs_set_prop(inode, name, value, size, flags); } static const struct xattr_handler btrfs_security_xattr_handler = { diff --git a/fs/buffer.c b/fs/buffer.c index 754813a6962b..6c15012a75d9 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1687,7 +1687,7 @@ static struct buffer_head *create_page_buffers(struct page *page, struct inode * * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this * causes the writes to be flagged as synchronous writes. */ -static int __block_write_full_page(struct inode *inode, struct page *page, +int __block_write_full_page(struct inode *inode, struct page *page, get_block_t *get_block, struct writeback_control *wbc, bh_end_io_t *handler) { @@ -1848,6 +1848,7 @@ recover: unlock_page(page); goto done; } +EXPORT_SYMBOL(__block_write_full_page); /* * If a page has any new buffers, zero them out here, and mark them uptodate diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 861d611b8c05..ce5f345d70f5 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -380,7 +380,7 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache) * check if the backing cache is updated to FS-Cache * - called by FS-Cache when evaluates if need to invalidate the cache */ -static bool cachefiles_check_consistency(struct fscache_operation *op) +static int cachefiles_check_consistency(struct fscache_operation *op) { struct cachefiles_object *object; struct cachefiles_cache *cache; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index eeb71e5de27a..26a9d10d75e9 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -276,8 +276,10 @@ static void finish_read(struct ceph_osd_request *req) for (i = 0; i < num_pages; i++) { struct page *page = osd_data->pages[i]; - if (rc < 0 && rc != -ENOENT) + if (rc < 0 && rc != -ENOENT) { + ceph_fscache_readpage_cancel(inode, page); goto unlock; + } if (bytes < (int)PAGE_SIZE) { /* zero (remainder of) page */ int s = bytes < 0 ? 0 : bytes; @@ -535,8 +537,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC); - ceph_readpage_to_fscache(inode, page); - set_page_writeback(page); err = ceph_osdc_writepages(osdc, ceph_vino(inode), &ci->i_layout, snapc, diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index c052b5bf219b..238c55b01723 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -25,6 +25,7 @@ #include "cache.h" struct ceph_aux_inode { + u64 version; struct timespec mtime; loff_t size; }; @@ -69,15 +70,8 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc) fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index, &ceph_fscache_fsid_object_def, fsc, true); - - if (fsc->fscache == NULL) { + if (!fsc->fscache) pr_err("Unable to resgister fsid: %p fscache cookie", fsc); - return 0; - } - - fsc->revalidate_wq = alloc_workqueue("ceph-revalidate", 0, 1); - if (fsc->revalidate_wq == NULL) - return -ENOMEM; return 0; } @@ -105,6 +99,7 @@ static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data, const struct inode* inode = &ci->vfs_inode; memset(&aux, 0, sizeof(aux)); + aux.version = ci->i_version; aux.mtime = inode->i_mtime; aux.size = i_size_read(inode); @@ -131,6 +126,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux( return FSCACHE_CHECKAUX_OBSOLETE; memset(&aux, 0, sizeof(aux)); + aux.version = ci->i_version; aux.mtime = inode->i_mtime; aux.size = i_size_read(inode); @@ -181,32 +177,26 @@ static const struct fscache_cookie_def ceph_fscache_inode_object_def = { .now_uncached = ceph_fscache_inode_now_uncached, }; -void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc, - struct ceph_inode_info* ci) +void ceph_fscache_register_inode_cookie(struct inode *inode) { - struct inode* inode = &ci->vfs_inode; + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); /* No caching for filesystem */ if (fsc->fscache == NULL) return; /* Only cache for regular files that are read only */ - if ((ci->vfs_inode.i_mode & S_IFREG) == 0) + if (!S_ISREG(inode->i_mode)) return; - /* Avoid multiple racing open requests */ - inode_lock(inode); - - if (ci->fscache) - goto done; - - ci->fscache = fscache_acquire_cookie(fsc->fscache, - &ceph_fscache_inode_object_def, - ci, true); - fscache_check_consistency(ci->fscache); -done: + inode_lock_nested(inode, I_MUTEX_CHILD); + if (!ci->fscache) { + ci->fscache = fscache_acquire_cookie(fsc->fscache, + &ceph_fscache_inode_object_def, + ci, false); + } inode_unlock(inode); - } void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci) @@ -222,6 +212,34 @@ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci) fscache_relinquish_cookie(cookie, 0); } +static bool ceph_fscache_can_enable(void *data) +{ + struct inode *inode = data; + return !inode_is_open_for_write(inode); +} + +void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + + if (!fscache_cookie_valid(ci->fscache)) + return; + + if (inode_is_open_for_write(inode)) { + dout("fscache_file_set_cookie %p %p disabling cache\n", + inode, filp); + fscache_disable_cookie(ci->fscache, false); + fscache_uncache_all_inode_pages(ci->fscache, inode); + } else { + fscache_enable_cookie(ci->fscache, ceph_fscache_can_enable, + inode); + if (fscache_cookie_enabled(ci->fscache)) { + dout("fscache_file_set_cookie %p %p enabing cache\n", + inode, filp); + } + } +} + static void ceph_vfs_readpage_complete(struct page *page, void *data, int error) { if (!error) @@ -238,8 +256,7 @@ static void ceph_vfs_readpage_complete_unlock(struct page *page, void *data, int static inline bool cache_valid(struct ceph_inode_info *ci) { - return ((ceph_caps_issued(ci) & CEPH_CAP_FILE_CACHE) && - (ci->i_fscache_gen == ci->i_rdcache_gen)); + return ci->i_fscache_gen == ci->i_rdcache_gen; } @@ -332,69 +349,27 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page) void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc) { - if (fsc->revalidate_wq) - destroy_workqueue(fsc->revalidate_wq); - fscache_relinquish_cookie(fsc->fscache, 0); fsc->fscache = NULL; } -static void ceph_revalidate_work(struct work_struct *work) -{ - int issued; - u32 orig_gen; - struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, - i_revalidate_work); - struct inode *inode = &ci->vfs_inode; - - spin_lock(&ci->i_ceph_lock); - issued = __ceph_caps_issued(ci, NULL); - orig_gen = ci->i_rdcache_gen; - spin_unlock(&ci->i_ceph_lock); - - if (!(issued & CEPH_CAP_FILE_CACHE)) { - dout("revalidate_work lost cache before validation %p\n", - inode); - goto out; - } - - if (!fscache_check_consistency(ci->fscache)) - fscache_invalidate(ci->fscache); - - spin_lock(&ci->i_ceph_lock); - /* Update the new valid generation (backwards sanity check too) */ - if (orig_gen > ci->i_fscache_gen) { - ci->i_fscache_gen = orig_gen; - } - spin_unlock(&ci->i_ceph_lock); - -out: - iput(&ci->vfs_inode); -} - -void ceph_queue_revalidate(struct inode *inode) +/* + * caller should hold CEPH_CAP_FILE_{RD,CACHE} + */ +void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci) { - struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); - struct ceph_inode_info *ci = ceph_inode(inode); - - if (fsc->revalidate_wq == NULL || ci->fscache == NULL) + if (cache_valid(ci)) return; - ihold(inode); - - if (queue_work(ceph_sb_to_client(inode->i_sb)->revalidate_wq, - &ci->i_revalidate_work)) { - dout("ceph_queue_revalidate %p\n", inode); - } else { - dout("ceph_queue_revalidate %p failed\n)", inode); - iput(inode); + /* resue i_truncate_mutex. There should be no pending + * truncate while the caller holds CEPH_CAP_FILE_RD */ + mutex_lock(&ci->i_truncate_mutex); + if (!cache_valid(ci)) { + if (fscache_check_consistency(ci->fscache)) + fscache_invalidate(ci->fscache); + spin_lock(&ci->i_ceph_lock); + ci->i_fscache_gen = ci->i_rdcache_gen; + spin_unlock(&ci->i_ceph_lock); } -} - -void ceph_fscache_inode_init(struct ceph_inode_info *ci) -{ - ci->fscache = NULL; - /* The first load is verifed cookie open time */ - ci->i_fscache_gen = 1; - INIT_WORK(&ci->i_revalidate_work, ceph_revalidate_work); + mutex_unlock(&ci->i_truncate_mutex); } diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h index 5ac591bd012b..7e72c7594f0c 100644 --- a/fs/ceph/cache.h +++ b/fs/ceph/cache.h @@ -34,10 +34,10 @@ void ceph_fscache_unregister(void); int ceph_fscache_register_fs(struct ceph_fs_client* fsc); void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc); -void ceph_fscache_inode_init(struct ceph_inode_info *ci); -void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc, - struct ceph_inode_info* ci); +void ceph_fscache_register_inode_cookie(struct inode *inode); void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci); +void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp); +void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci); int ceph_readpage_from_fscache(struct inode *inode, struct page *page); int ceph_readpages_from_fscache(struct inode *inode, @@ -46,12 +46,11 @@ int ceph_readpages_from_fscache(struct inode *inode, unsigned *nr_pages); void ceph_readpage_to_fscache(struct inode *inode, struct page *page); void ceph_invalidate_fscache_page(struct inode* inode, struct page *page); -void ceph_queue_revalidate(struct inode *inode); -static inline void ceph_fscache_update_objectsize(struct inode *inode) +static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci) { - struct ceph_inode_info *ci = ceph_inode(inode); - fscache_attr_changed(ci->fscache); + ci->fscache = NULL; + ci->i_fscache_gen = 0; } static inline void ceph_fscache_invalidate(struct inode *inode) @@ -88,6 +87,11 @@ static inline void ceph_fscache_readpages_cancel(struct inode *inode, return fscache_readpages_cancel(ci->fscache, pages); } +static inline void ceph_disable_fscache_readpage(struct ceph_inode_info *ci) +{ + ci->i_fscache_gen = ci->i_rdcache_gen - 1; +} + #else static inline int ceph_fscache_register(void) @@ -112,8 +116,20 @@ static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci) { } -static inline void ceph_fscache_register_inode_cookie(struct ceph_fs_client* parent_fsc, - struct ceph_inode_info* ci) +static inline void ceph_fscache_register_inode_cookie(struct inode *inode) +{ +} + +static inline void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci) +{ +} + +static inline void ceph_fscache_file_set_cookie(struct inode *inode, + struct file *filp) +{ +} + +static inline void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci) { } @@ -141,10 +157,6 @@ static inline void ceph_readpage_to_fscache(struct inode *inode, { } -static inline void ceph_fscache_update_objectsize(struct inode *inode) -{ -} - static inline void ceph_fscache_invalidate(struct inode *inode) { } @@ -154,10 +166,6 @@ static inline void ceph_invalidate_fscache_page(struct inode *inode, { } -static inline void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci) -{ -} - static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp) { return 1; @@ -173,7 +181,7 @@ static inline void ceph_fscache_readpages_cancel(struct inode *inode, { } -static inline void ceph_queue_revalidate(struct inode *inode) +static inline void ceph_disable_fscache_readpage(struct ceph_inode_info *ci) { } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c17b5d76d75e..6f60d0a3d0f9 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2393,6 +2393,9 @@ again: snap_rwsem_locked = true; } *got = need | (have & want); + if ((need & CEPH_CAP_FILE_RD) && + !(*got & CEPH_CAP_FILE_CACHE)) + ceph_disable_fscache_readpage(ci); __take_cap_refs(ci, *got, true); ret = 1; } @@ -2554,6 +2557,9 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, break; } + if ((_got & CEPH_CAP_FILE_RD) && (_got & CEPH_CAP_FILE_CACHE)) + ceph_fscache_revalidate_cookie(ci); + *got = _got; return 0; } @@ -2795,7 +2801,6 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, bool writeback = false; bool queue_trunc = false; bool queue_invalidate = false; - bool queue_revalidate = false; bool deleted_inode = false; bool fill_inline = false; @@ -2837,8 +2842,6 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, ci->i_rdcache_revoking = ci->i_rdcache_gen; } } - - ceph_fscache_invalidate(inode); } /* side effects now are allowed */ @@ -2880,11 +2883,6 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, } } - /* Do we need to revalidate our fscache cookie. Don't bother on the - * first cache cap as we already validate at cookie creation time. */ - if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) - queue_revalidate = true; - if (newcaps & CEPH_CAP_ANY_RD) { /* ctime/mtime/atime? */ ceph_decode_timespec(&mtime, &grant->mtime); @@ -2993,11 +2991,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, if (fill_inline) ceph_fill_inline_data(inode, NULL, inline_data, inline_len); - if (queue_trunc) { + if (queue_trunc) ceph_queue_vmtruncate(inode); - ceph_queue_revalidate(inode); - } else if (queue_revalidate) - ceph_queue_revalidate(inode); if (writeback) /* @@ -3199,10 +3194,8 @@ static void handle_cap_trunc(struct inode *inode, truncate_seq, truncate_size, size); spin_unlock(&ci->i_ceph_lock); - if (queue_trunc) { + if (queue_trunc) ceph_queue_vmtruncate(inode); - ceph_fscache_invalidate(inode); - } } /* diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 6e72c98162d5..1780218a48f0 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -95,10 +95,8 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) } dentry = d_obtain_alias(inode); - if (IS_ERR(dentry)) { - iput(inode); + if (IS_ERR(dentry)) return dentry; - } err = ceph_init_dentry(dentry); if (err < 0) { dput(dentry); @@ -167,10 +165,8 @@ static struct dentry *__get_parent(struct super_block *sb, return ERR_PTR(-ENOENT); dentry = d_obtain_alias(inode); - if (IS_ERR(dentry)) { - iput(inode); + if (IS_ERR(dentry)) return dentry; - } err = ceph_init_dentry(dentry); if (err < 0) { dput(dentry); @@ -210,7 +206,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, dout("fh_to_parent %llx\n", cfh->parent_ino); dentry = __get_parent(sb, NULL, cfh->ino); - if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT) + if (unlikely(dentry == ERR_PTR(-ENOENT))) dentry = __fh_to_dentry(sb, cfh->parent_ino); return dentry; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index a888df6f2d71..0daaf7ceedc5 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -137,23 +137,11 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) { struct ceph_file_info *cf; int ret = 0; - struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; switch (inode->i_mode & S_IFMT) { case S_IFREG: - /* First file open request creates the cookie, we want to keep - * this cookie around for the filetime of the inode as not to - * have to worry about fscache register / revoke / operation - * races. - * - * Also, if we know the operation is going to invalidate data - * (non readonly) just nuke the cache right away. - */ - ceph_fscache_register_inode_cookie(mdsc->fsc, ci); - if ((fmode & CEPH_FILE_MODE_WR)) - ceph_fscache_invalidate(inode); + ceph_fscache_register_inode_cookie(inode); + ceph_fscache_file_set_cookie(inode, file); case S_IFDIR: dout("init_file %p %p 0%o (regular)\n", inode, file, inode->i_mode); @@ -406,7 +394,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); - if (d_unhashed(dentry)) { + if (d_in_lookup(dentry)) { dn = ceph_finish_lookup(req, dentry, err); if (IS_ERR(dn)) err = PTR_ERR(dn); @@ -1349,7 +1337,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) } retry_snap: - if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) { + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) { err = -ENOSPC; goto out; } @@ -1407,7 +1395,6 @@ retry_snap: iov_iter_advance(from, written); ceph_put_snap_context(snapc); } else { - loff_t old_size = i_size_read(inode); /* * No need to acquire the i_truncate_mutex. Because * the MDS revokes Fwb caps before sending truncate @@ -1418,8 +1405,6 @@ retry_snap: written = generic_perform_write(file, from, pos); if (likely(written >= 0)) iocb->ki_pos = pos + written; - if (i_size_read(inode) > old_size) - ceph_fscache_update_objectsize(inode); inode_unlock(inode); } @@ -1440,7 +1425,7 @@ retry_snap: ceph_put_cap_refs(ci, got); if (written >= 0) { - if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL)) + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_NEARFULL)) iocb->ki_flags |= IOCB_DSYNC; written = generic_write_sync(iocb, written); @@ -1672,8 +1657,8 @@ static long ceph_fallocate(struct file *file, int mode, goto unlock; } - if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) && - !(mode & FALLOC_FL_PUNCH_HOLE)) { + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && + !(mode & FALLOC_FL_PUNCH_HOLE)) { ret = -ENOSPC; goto unlock; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 0130a8592191..0168b49fb6ad 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -103,7 +103,6 @@ struct ceph_fs_client { #ifdef CONFIG_CEPH_FSCACHE struct fscache_cookie *fscache; - struct workqueue_struct *revalidate_wq; #endif }; @@ -360,8 +359,7 @@ struct ceph_inode_info { #ifdef CONFIG_CEPH_FSCACHE struct fscache_cookie *fscache; - u32 i_fscache_gen; /* sequence, for delayed fscache validate */ - struct work_struct i_revalidate_work; + u32 i_fscache_gen; #endif struct inode vfs_inode; /* at end */ }; diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index dacc1bd85629..4870b29df224 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -1056,12 +1056,13 @@ static int ceph_get_xattr_handler(const struct xattr_handler *handler, } static int ceph_set_xattr_handler(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { if (!ceph_is_valid_xattr(name)) return -EOPNOTSUPP; - return __ceph_setxattr(d_inode(dentry), name, value, size, flags); + return __ceph_setxattr(inode, name, value, size, flags); } const struct xattr_handler ceph_other_xattr_handler = { diff --git a/fs/char_dev.c b/fs/char_dev.c index 687471dc04a0..6edd825231c5 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -92,7 +92,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, } if (i < CHRDEV_MAJOR_DYN_END) - pr_warn("CHRDEV \"%s\" major number %d goes below the dynamic allocation range", + pr_warn("CHRDEV \"%s\" major number %d goes below the dynamic allocation range\n", name, i); if (i == 0) { diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 5a53ac6b1e02..02b071bf3732 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -101,6 +101,12 @@ convert_sfm_char(const __u16 src_char, char *target) case SFM_SLASH: *target = '\\'; break; + case SFM_SPACE: + *target = ' '; + break; + case SFM_PERIOD: + *target = '.'; + break; default: return false; } @@ -404,7 +410,7 @@ static __le16 convert_to_sfu_char(char src_char) return dest_char; } -static __le16 convert_to_sfm_char(char src_char) +static __le16 convert_to_sfm_char(char src_char, bool end_of_string) { __le16 dest_char; @@ -427,6 +433,18 @@ static __le16 convert_to_sfm_char(char src_char) case '|': dest_char = cpu_to_le16(SFM_PIPE); break; + case '.': + if (end_of_string) + dest_char = cpu_to_le16(SFM_PERIOD); + else + dest_char = 0; + break; + case ' ': + if (end_of_string) + dest_char = cpu_to_le16(SFM_SPACE); + else + dest_char = 0; + break; default: dest_char = 0; } @@ -469,9 +487,16 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, /* see if we must remap this char */ if (map_chars == SFU_MAP_UNI_RSVD) dst_char = convert_to_sfu_char(src_char); - else if (map_chars == SFM_MAP_UNI_RSVD) - dst_char = convert_to_sfm_char(src_char); - else + else if (map_chars == SFM_MAP_UNI_RSVD) { + bool end_of_string; + + if (i == srclen - 1) + end_of_string = true; + else + end_of_string = false; + + dst_char = convert_to_sfm_char(src_char, end_of_string); + } else dst_char = 0; /* * FIXME: We can not handle remapping backslash (UNI_SLASH) diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index bdc52cb9a676..479bc0a941f3 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -64,6 +64,8 @@ #define SFM_LESSTHAN ((__u16) 0xF023) #define SFM_PIPE ((__u16) 0xF027) #define SFM_SLASH ((__u16) 0xF026) +#define SFM_PERIOD ((__u16) 0xF028) +#define SFM_SPACE ((__u16) 0xF029) /* * Mapping mechanism to use when one of the seven reserved characters is diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5d8b7edf8a8f..5d841f39c4b7 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -87,6 +87,7 @@ extern mempool_t *cifs_req_poolp; extern mempool_t *cifs_mid_poolp; struct workqueue_struct *cifsiod_wq; +__u32 cifs_lock_secret; /* * Bumps refcount for cifs super block. @@ -1266,6 +1267,8 @@ init_cifs(void) spin_lock_init(&cifs_file_list_lock); spin_lock_init(&GlobalMid_Lock); + get_random_bytes(&cifs_lock_secret, sizeof(cifs_lock_secret)); + if (cifs_max_pending < 2) { cifs_max_pending = 2; cifs_dbg(FYI, "cifs_max_pending set to min of 2\n"); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index bba106cdc43c..8f1d8c1e72be 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1619,6 +1619,7 @@ void cifs_oplock_break(struct work_struct *work); extern const struct slow_work_ops cifs_oplock_break_ops; extern struct workqueue_struct *cifsiod_wq; +extern __u32 cifs_lock_secret; extern mempool_t *cifs_mid_poolp; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 66736f57b5ab..7d2b15c06090 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -428,7 +428,9 @@ cifs_echo_request(struct work_struct *work) * server->ops->need_neg() == true. Also, no need to ping if * we got a response recently. */ - if (!server->ops->need_neg || server->ops->need_neg(server) || + + if (server->tcpStatus == CifsNeedReconnect || + server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew || (server->ops->can_echo && !server->ops->can_echo(server)) || time_before(jiffies, server->lstrp + echo_interval - HZ)) goto requeue_echo; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index c3eb998a99bd..fb0903fffc22 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -445,7 +445,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, * Check for hashed negative dentry. We have already revalidated * the dentry and it is fine. No need to perform another lookup. */ - if (!d_unhashed(direntry)) + if (!d_in_lookup(direntry)) return -ENOENT; res = cifs_lookup(inode, direntry, 0); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 9793ae0bcaa2..d4890b6dc22d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1112,6 +1112,12 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) return rc; } +static __u32 +hash_lockowner(fl_owner_t owner) +{ + return cifs_lock_secret ^ hash32_ptr((const void *)owner); +} + struct lock_to_push { struct list_head llist; __u64 offset; @@ -1178,7 +1184,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) else type = CIFS_WRLCK; lck = list_entry(el, struct lock_to_push, llist); - lck->pid = flock->fl_pid; + lck->pid = hash_lockowner(flock->fl_owner); lck->netfid = cfile->fid.netfid; lck->length = length; lck->type = type; @@ -1305,7 +1311,8 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, posix_lock_type = CIFS_RDLCK; else posix_lock_type = CIFS_WRLCK; - rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid, + rc = CIFSSMBPosixLock(xid, tcon, netfid, + hash_lockowner(flock->fl_owner), flock->fl_start, length, flock, posix_lock_type, wait_flag); return rc; @@ -1505,7 +1512,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, posix_lock_type = CIFS_UNLCK; rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, - current->tgid, flock->fl_start, length, + hash_lockowner(flock->fl_owner), + flock->fl_start, length, NULL, posix_lock_type, wait_flag); goto out; } diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index 848249fa120f..3079b38f0afb 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -133,6 +133,6 @@ typedef struct _AUTHENTICATE_MESSAGE { int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses); void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, struct cifs_ses *ses); -int build_ntlmssp_auth_blob(unsigned char *pbuffer, u16 *buflen, +int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, const struct nls_table *nls_cp); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index af0ec2d5ad0e..538d9b55699a 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -364,19 +364,43 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, sec_blob->DomainName.MaximumLength = 0; } -/* We do not malloc the blob, it is passed in pbuffer, because its - maximum possible size is fixed and small, making this approach cleaner. - This function returns the length of the data in the blob */ -int build_ntlmssp_auth_blob(unsigned char *pbuffer, +static int size_of_ntlmssp_blob(struct cifs_ses *ses) +{ + int sz = sizeof(AUTHENTICATE_MESSAGE) + ses->auth_key.len + - CIFS_SESS_KEY_SIZE + CIFS_CPHTXT_SIZE + 2; + + if (ses->domainName) + sz += 2 * strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN); + else + sz += 2; + + if (ses->user_name) + sz += 2 * strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN); + else + sz += 2; + + return sz; +} + +int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, const struct nls_table *nls_cp) { int rc; - AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; + AUTHENTICATE_MESSAGE *sec_blob; __u32 flags; unsigned char *tmp; + rc = setup_ntlmv2_rsp(ses, nls_cp); + if (rc) { + cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc); + *buflen = 0; + goto setup_ntlmv2_ret; + } + *pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL); + sec_blob = (AUTHENTICATE_MESSAGE *)*pbuffer; + memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); sec_blob->MessageType = NtLmAuthenticate; @@ -391,7 +415,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, flags |= NTLMSSP_NEGOTIATE_KEY_XCH; } - tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE); + tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE); sec_blob->NegotiateFlags = cpu_to_le32(flags); sec_blob->LmChallengeResponse.BufferOffset = @@ -399,13 +423,9 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, sec_blob->LmChallengeResponse.Length = 0; sec_blob->LmChallengeResponse.MaximumLength = 0; - sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->NtChallengeResponse.BufferOffset = + cpu_to_le32(tmp - *pbuffer); if (ses->user_name != NULL) { - rc = setup_ntlmv2_rsp(ses, nls_cp); - if (rc) { - cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc); - goto setup_ntlmv2_ret; - } memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE, ses->auth_key.len - CIFS_SESS_KEY_SIZE); tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE; @@ -423,23 +443,23 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, } if (ses->domainName == NULL) { - sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->DomainName.Length = 0; sec_blob->DomainName.MaximumLength = 0; tmp += 2; } else { int len; len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName, - CIFS_MAX_USERNAME_LEN, nls_cp); + CIFS_MAX_DOMAINNAME_LEN, nls_cp); len *= 2; /* unicode is 2 bytes each */ - sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->DomainName.Length = cpu_to_le16(len); sec_blob->DomainName.MaximumLength = cpu_to_le16(len); tmp += len; } if (ses->user_name == NULL) { - sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->UserName.Length = 0; sec_blob->UserName.MaximumLength = 0; tmp += 2; @@ -448,13 +468,13 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name, CIFS_MAX_USERNAME_LEN, nls_cp); len *= 2; /* unicode is 2 bytes each */ - sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->UserName.Length = cpu_to_le16(len); sec_blob->UserName.MaximumLength = cpu_to_le16(len); tmp += len; } - sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->WorkstationName.Length = 0; sec_blob->WorkstationName.MaximumLength = 0; tmp += 2; @@ -463,19 +483,19 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, (ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC)) && !calc_seckey(ses)) { memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE); - sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE); sec_blob->SessionKey.MaximumLength = cpu_to_le16(CIFS_CPHTXT_SIZE); tmp += CIFS_CPHTXT_SIZE; } else { - sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->SessionKey.Length = 0; sec_blob->SessionKey.MaximumLength = 0; } + *buflen = tmp - *pbuffer; setup_ntlmv2_ret: - *buflen = tmp - pbuffer; return rc; } @@ -690,6 +710,8 @@ sess_auth_lanman(struct sess_data *sess_data) rc = calc_lanman_hash(ses->password, ses->server->cryptkey, ses->server->sec_mode & SECMODE_PW_ENCRYPT ? true : false, lnm_session_key); + if (rc) + goto out; memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); bcc_ptr += CIFS_AUTH_RESP_SIZE; @@ -1266,7 +1288,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data) struct cifs_ses *ses = sess_data->ses; __u16 bytes_remaining; char *bcc_ptr; - char *ntlmsspblob = NULL; + unsigned char *ntlmsspblob = NULL; u16 blob_len; cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n"); @@ -1279,19 +1301,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data) /* Build security blob before we assemble the request */ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; smb_buf = (struct smb_hdr *)pSMB; - /* - * 5 is an empirical value, large enough to hold - * authenticate message plus max 10 of av paris, - * domain, user, workstation names, flags, etc. - */ - ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE), - GFP_KERNEL); - if (!ntlmsspblob) { - rc = -ENOMEM; - goto out; - } - - rc = build_ntlmssp_auth_blob(ntlmsspblob, + rc = build_ntlmssp_auth_blob(&ntlmsspblob, &blob_len, ses, sess_data->nls_cp); if (rc) goto out_free_ntlmsspblob; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 8f38e33d365b..29e06db5f187 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -588,7 +588,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, u16 blob_length = 0; struct key *spnego_key = NULL; char *security_blob = NULL; - char *ntlmssp_blob = NULL; + unsigned char *ntlmssp_blob = NULL; bool use_spnego = false; /* else use raw ntlmssp */ cifs_dbg(FYI, "Session Setup\n"); @@ -713,13 +713,7 @@ ssetup_ntlmssp_authenticate: iov[1].iov_len = blob_length; } else if (phase == NtLmAuthenticate) { req->hdr.SessionId = ses->Suid; - ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500, - GFP_KERNEL); - if (ntlmssp_blob == NULL) { - rc = -ENOMEM; - goto ssetup_exit; - } - rc = build_ntlmssp_auth_blob(ntlmssp_blob, &blob_length, ses, + rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses, nls_cp); if (rc) { cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n", @@ -1818,6 +1812,33 @@ SMB2_echo(struct TCP_Server_Info *server) cifs_dbg(FYI, "In echo request\n"); + if (server->tcpStatus == CifsNeedNegotiate) { + struct list_head *tmp, *tmp2; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + + cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n"); + spin_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + list_for_each(tmp2, &ses->tcon_list) { + tcon = list_entry(tmp2, struct cifs_tcon, + tcon_list); + /* add check for persistent handle reconnect */ + if (tcon && tcon->need_reconnect) { + spin_unlock(&cifs_tcp_ses_lock); + rc = smb2_reconnect(SMB2_ECHO, tcon); + spin_lock(&cifs_tcp_ses_lock); + } + } + } + spin_unlock(&cifs_tcp_ses_lock); + } + + /* if no session, renegotiate failed above */ + if (server->tcpStatus == CifsNeedNegotiate) + return -EIO; + rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req); if (rc) return rc; diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index c8b77aa24a1d..5e23f64c0804 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -39,8 +39,9 @@ enum { XATTR_USER, XATTR_CIFS_ACL, XATTR_ACL_ACCESS, XATTR_ACL_DEFAULT }; static int cifs_xattr_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { int rc = -EOPNOTSUPP; unsigned int xid; @@ -99,12 +100,12 @@ static int cifs_xattr_set(const struct xattr_handler *handler, if (value && pTcon->ses->server->ops->set_acl) rc = pTcon->ses->server->ops->set_acl(pacl, - size, d_inode(dentry), + size, inode, full_path, CIFS_ACL_DACL); else rc = -EOPNOTSUPP; if (rc == 0) /* force revalidate of the inode */ - CIFS_I(d_inode(dentry))->time = 0; + CIFS_I(inode)->time = 0; kfree(pacl); } #endif /* CONFIG_CIFS_ACL */ diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 33b7ee34eda5..bbc1252a59f5 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -357,8 +357,6 @@ configfs_write_bin_file(struct file *file, const char __user *buf, len = simple_write_to_buffer(buffer->bin_buffer, buffer->bin_buffer_size, ppos, buf, count); - if (len > 0) - *ppos += len; out: mutex_unlock(&buffer->mutex); return len; diff --git a/fs/coredump.c b/fs/coredump.c index 38a7ab87e10a..281b768000e6 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -794,6 +794,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr) return 0; file->f_pos = pos; cprm->written += n; + cprm->pos += n; nr -= n; } return 1; @@ -808,6 +809,7 @@ int dump_skip(struct coredump_params *cprm, size_t nr) if (dump_interrupted() || file->f_op->llseek(file, nr, SEEK_CUR) < 0) return 0; + cprm->pos += nr; return 1; } else { while (nr > PAGE_SIZE) { @@ -822,7 +824,7 @@ EXPORT_SYMBOL(dump_skip); int dump_align(struct coredump_params *cprm, int align) { - unsigned mod = cprm->file->f_pos & (align - 1); + unsigned mod = cprm->pos & (align - 1); if (align & (align - 1)) return 0; return mod ? dump_skip(cprm, align - mod) : 1; @@ -208,7 +208,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, dax.addr += first; size = map_len - first; } - max = min(pos + size, end); + /* + * pos + size is one past the last offset for IO, + * so pos + size can overflow loff_t at extreme offsets. + * Cast to u64 to catch this and get the true minimum. + */ + max = min_t(u64, pos + size, end); } if (iov_iter_rw(iter) == WRITE) { diff --git a/fs/dcache.c b/fs/dcache.c index c622872c12c5..d6847d7b123d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -507,6 +507,44 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); +static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent) +{ + struct dentry *next; + /* + * Inform d_walk() and shrink_dentry_list() that we are no longer + * attached to the dentry tree + */ + dentry->d_flags |= DCACHE_DENTRY_KILLED; + if (unlikely(list_empty(&dentry->d_child))) + return; + __list_del_entry(&dentry->d_child); + /* + * Cursors can move around the list of children. While we'd been + * a normal list member, it didn't matter - ->d_child.next would've + * been updated. However, from now on it won't be and for the + * things like d_walk() it might end up with a nasty surprise. + * Normally d_walk() doesn't care about cursors moving around - + * ->d_lock on parent prevents that and since a cursor has no children + * of its own, we get through it without ever unlocking the parent. + * There is one exception, though - if we ascend from a child that + * gets killed as soon as we unlock it, the next sibling is found + * using the value left in its ->d_child.next. And if _that_ + * pointed to a cursor, and cursor got moved (e.g. by lseek()) + * before d_walk() regains parent->d_lock, we'll end up skipping + * everything the cursor had been moved past. + * + * Solution: make sure that the pointer left behind in ->d_child.next + * points to something that won't be moving around. I.e. skip the + * cursors. + */ + while (dentry->d_child.next != &parent->d_subdirs) { + next = list_entry(dentry->d_child.next, struct dentry, d_child); + if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR))) + break; + dentry->d_child.next = next->d_child.next; + } +} + static void __dentry_kill(struct dentry *dentry) { struct dentry *parent = NULL; @@ -532,12 +570,7 @@ static void __dentry_kill(struct dentry *dentry) } /* if it was on the hash then remove it */ __d_drop(dentry); - __list_del_entry(&dentry->d_child); - /* - * Inform d_walk() that we are no longer attached to the - * dentry tree - */ - dentry->d_flags |= DCACHE_DENTRY_KILLED; + dentry_unlist(dentry, parent); if (parent) spin_unlock(&parent->d_lock); dentry_iput(dentry); @@ -1203,6 +1236,9 @@ resume: struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; + if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR)) + continue; + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); ret = enter(data, dentry); @@ -1636,7 +1672,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) struct dentry *dentry = __d_alloc(parent->d_sb, name); if (!dentry) return NULL; - + dentry->d_flags |= DCACHE_RCUACCESS; spin_lock(&parent->d_lock); /* * don't need child lock because it is not subject @@ -1651,6 +1687,16 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +struct dentry *d_alloc_cursor(struct dentry * parent) +{ + struct dentry *dentry = __d_alloc(parent->d_sb, NULL); + if (dentry) { + dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR; + dentry->d_parent = dget(parent); + } + return dentry; +} + /** * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems) * @sb: the superblock @@ -1670,8 +1716,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) struct qstr q; q.name = name; - q.len = strlen(name); - q.hash = full_name_hash(q.name, q.len); + q.hash_len = hashlen_string(name); return d_alloc(parent, &q); } EXPORT_SYMBOL(d_alloc_name); @@ -2359,7 +2404,6 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) { BUG_ON(!d_unhashed(entry)); hlist_bl_lock(b); - entry->d_flags |= DCACHE_RCUACCESS; hlist_bl_add_head_rcu(&entry->d_hash, b); hlist_bl_unlock(b); } @@ -2459,7 +2503,6 @@ retry: rcu_read_unlock(); goto retry; } - rcu_read_unlock(); /* * No changes for the parent since the beginning of d_lookup(). * Since all removals from the chain happen with hlist_bl_lock(), @@ -2472,8 +2515,6 @@ retry: continue; if (dentry->d_parent != parent) continue; - if (d_unhashed(dentry)) - continue; if (parent->d_flags & DCACHE_OP_COMPARE) { int tlen = dentry->d_name.len; const char *tname = dentry->d_name.name; @@ -2485,9 +2526,18 @@ retry: if (dentry_cmp(dentry, str, len)) continue; } - dget(dentry); hlist_bl_unlock(b); - /* somebody is doing lookup for it right now; wait for it */ + /* now we can try to grab a reference */ + if (!lockref_get_not_dead(&dentry->d_lockref)) { + rcu_read_unlock(); + goto retry; + } + + rcu_read_unlock(); + /* + * somebody is likely to be still doing lookup for it; + * wait for them to finish + */ spin_lock(&dentry->d_lock); d_wait_lookup(dentry); /* @@ -2518,6 +2568,7 @@ retry: dput(new); return dentry; } + rcu_read_unlock(); /* we can't take ->d_lock here; it's OK, though. */ new->d_flags |= DCACHE_PAR_LOOKUP; new->d_wait = wq; @@ -2844,6 +2895,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target, /* ... and switch them in the tree */ if (IS_ROOT(dentry)) { /* splicing a tree */ + dentry->d_flags |= DCACHE_RCUACCESS; dentry->d_parent = target->d_parent; target->d_parent = target; list_del_init(&target->d_child); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 9c1c9a01b7e5..592059f88e04 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -127,7 +127,6 @@ static int open_proxy_open(struct inode *inode, struct file *filp) r = real_fops->open(inode, filp); out: - fops_put(real_fops); debugfs_use_file_finish(srcu_idx); return r; } @@ -262,8 +261,10 @@ static int full_proxy_open(struct inode *inode, struct file *filp) if (real_fops->open) { r = real_fops->open(inode, filp); - - if (filp->f_op != proxy_fops) { + if (r) { + replace_fops(filp, d_inode(dentry)->i_fop); + goto free_proxy; + } else if (filp->f_op != proxy_fops) { /* No protection against file removal anymore. */ WARN(1, "debugfs file owner replaced proxy fops: %pd", dentry); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 0b2954d7172d..37c134a132c7 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -95,8 +95,6 @@ static struct ctl_table pty_root_table[] = { static DEFINE_MUTEX(allocated_ptys_lock); -static struct vfsmount *devpts_mnt; - struct pts_mount_opts { int setuid; int setgid; @@ -104,7 +102,7 @@ struct pts_mount_opts { kgid_t gid; umode_t mode; umode_t ptmxmode; - int newinstance; + int reserve; int max; }; @@ -117,11 +115,9 @@ static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_mode, "mode=%o"}, -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES {Opt_ptmxmode, "ptmxmode=%o"}, {Opt_newinstance, "newinstance"}, {Opt_max, "max=%d"}, -#endif {Opt_err, NULL} }; @@ -137,15 +133,48 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb) return sb->s_fs_info; } -static inline struct super_block *pts_sb_from_inode(struct inode *inode) +struct pts_fs_info *devpts_acquire(struct file *filp) { -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES - if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) - return inode->i_sb; -#endif - if (!devpts_mnt) - return NULL; - return devpts_mnt->mnt_sb; + struct pts_fs_info *result; + struct path path; + struct super_block *sb; + int err; + + path = filp->f_path; + path_get(&path); + + /* Has the devpts filesystem already been found? */ + sb = path.mnt->mnt_sb; + if (sb->s_magic != DEVPTS_SUPER_MAGIC) { + /* Is a devpts filesystem at "pts" in the same directory? */ + err = path_pts(&path); + if (err) { + result = ERR_PTR(err); + goto out; + } + + /* Is the path the root of a devpts filesystem? */ + result = ERR_PTR(-ENODEV); + sb = path.mnt->mnt_sb; + if ((sb->s_magic != DEVPTS_SUPER_MAGIC) || + (path.mnt->mnt_root != sb->s_root)) + goto out; + } + + /* + * pty code needs to hold extra references in case of last /dev/tty close + */ + atomic_inc(&sb->s_active); + result = DEVPTS_SB(sb); + +out: + path_put(&path); + return result; +} + +void devpts_release(struct pts_fs_info *fsi) +{ + deactivate_super(fsi->sb); } #define PARSE_MOUNT 0 @@ -154,9 +183,7 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode) /* * parse_mount_options(): * Set @opts to mount options specified in @data. If an option is not - * specified in @data, set it to its default value. The exception is - * 'newinstance' option which can only be set/cleared on a mount (i.e. - * cannot be changed during remount). + * specified in @data, set it to its default value. * * Note: @data may be NULL (in which case all options are set to default). */ @@ -174,9 +201,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; opts->max = NR_UNIX98_PTY_MAX; - /* newinstance makes sense only on initial mount */ + /* Only allow instances mounted from the initial mount + * namespace to tap the reserve pool of ptys. + */ if (op == PARSE_MOUNT) - opts->newinstance = 0; + opts->reserve = + (current->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns); while ((p = strsep(&data, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; @@ -211,16 +241,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) return -EINVAL; opts->mode = option & S_IALLUGO; break; -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES case Opt_ptmxmode: if (match_octal(&args[0], &option)) return -EINVAL; opts->ptmxmode = option & S_IALLUGO; break; case Opt_newinstance: - /* newinstance makes sense only on initial mount */ - if (op == PARSE_MOUNT) - opts->newinstance = 1; break; case Opt_max: if (match_int(&args[0], &option) || @@ -228,7 +254,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) return -EINVAL; opts->max = option; break; -#endif default: pr_err("called with bogus options\n"); return -EINVAL; @@ -238,7 +263,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) return 0; } -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES static int mknod_ptmx(struct super_block *sb) { int mode; @@ -305,12 +329,6 @@ static void update_ptmx_mode(struct pts_fs_info *fsi) inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode; } } -#else -static inline void update_ptmx_mode(struct pts_fs_info *fsi) -{ - return; -} -#endif static int devpts_remount(struct super_block *sb, int *flags, char *data) { @@ -344,11 +362,9 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, opts->gid)); seq_printf(seq, ",mode=%03o", opts->mode); -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode); if (opts->max < NR_UNIX98_PTY_MAX) seq_printf(seq, ",max=%d", opts->max); -#endif return 0; } @@ -410,40 +426,11 @@ fail: return -ENOMEM; } -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES -static int compare_init_pts_sb(struct super_block *s, void *p) -{ - if (devpts_mnt) - return devpts_mnt->mnt_sb == s; - return 0; -} - /* * devpts_mount() * - * If the '-o newinstance' mount option was specified, mount a new - * (private) instance of devpts. PTYs created in this instance are - * independent of the PTYs in other devpts instances. - * - * If the '-o newinstance' option was not specified, mount/remount the - * initial kernel mount of devpts. This type of mount gives the - * legacy, single-instance semantics. - * - * The 'newinstance' option is needed to support multiple namespace - * semantics in devpts while preserving backward compatibility of the - * current 'single-namespace' semantics. i.e all mounts of devpts - * without the 'newinstance' mount option should bind to the initial - * kernel mount, like mount_single(). - * - * Mounts with 'newinstance' option create a new, private namespace. - * - * NOTE: - * - * For single-mount semantics, devpts cannot use mount_single(), - * because mount_single()/sget() find and use the super-block from - * the most recent mount of devpts. But that recent mount may be a - * 'newinstance' mount and mount_single() would pick the newinstance - * super-block instead of the initial super-block. + * Mount a new (private) instance of devpts. PTYs created in this + * instance are independent of the PTYs in other devpts instances. */ static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) @@ -456,18 +443,7 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type, if (error) return ERR_PTR(error); - /* Require newinstance for all user namespace mounts to ensure - * the mount options are not changed. - */ - if ((current_user_ns() != &init_user_ns) && !opts.newinstance) - return ERR_PTR(-EINVAL); - - if (opts.newinstance) - s = sget(fs_type, NULL, set_anon_super, flags, NULL); - else - s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags, - NULL); - + s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) return ERR_CAST(s); @@ -491,18 +467,6 @@ out_undo_sget: return ERR_PTR(error); } -#else -/* - * This supports only the legacy single-instance semantics (no - * multiple-instance semantics) - */ -static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) -{ - return mount_single(fs_type, flags, data, devpts_fill_super); -} -#endif - static void devpts_kill_sb(struct super_block *sb) { struct pts_fs_info *fsi = DEVPTS_SB(sb); @@ -516,9 +480,7 @@ static struct file_system_type devpts_fs_type = { .name = "devpts", .mount = devpts_mount, .kill_sb = devpts_kill_sb, -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES .fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT, -#endif }; /* @@ -531,16 +493,13 @@ int devpts_new_index(struct pts_fs_info *fsi) int index; int ida_ret; - if (!fsi) - return -ENODEV; - retry: if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL)) return -ENOMEM; mutex_lock(&allocated_ptys_lock); - if (pty_count >= pty_limit - - (fsi->mount_opts.newinstance ? pty_reserve : 0)) { + if (pty_count >= (pty_limit - + (fsi->mount_opts.reserve ? 0 : pty_reserve))) { mutex_unlock(&allocated_ptys_lock); return -ENOSPC; } @@ -571,30 +530,6 @@ void devpts_kill_index(struct pts_fs_info *fsi, int idx) mutex_unlock(&allocated_ptys_lock); } -/* - * pty code needs to hold extra references in case of last /dev/tty close - */ -struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file) -{ - struct super_block *sb; - struct pts_fs_info *fsi; - - sb = pts_sb_from_inode(ptmx_inode); - if (!sb) - return NULL; - fsi = DEVPTS_SB(sb); - if (!fsi) - return NULL; - - atomic_inc(&sb->s_active); - return fsi; -} - -void devpts_put_ref(struct pts_fs_info *fsi) -{ - deactivate_super(fsi->sb); -} - /** * devpts_pty_new -- create a new inode in /dev/pts/ * @ptmx_inode: inode of the master @@ -607,16 +542,12 @@ void devpts_put_ref(struct pts_fs_info *fsi) struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv) { struct dentry *dentry; - struct super_block *sb; + struct super_block *sb = fsi->sb; struct inode *inode; struct dentry *root; struct pts_mount_opts *opts; char s[12]; - if (!fsi) - return ERR_PTR(-ENODEV); - - sb = fsi->sb; root = sb->s_root; opts = &fsi->mount_opts; @@ -676,20 +607,8 @@ void devpts_pty_kill(struct dentry *dentry) static int __init init_devpts_fs(void) { int err = register_filesystem(&devpts_fs_type); - struct ctl_table_header *table; - if (!err) { - struct vfsmount *mnt; - - table = register_sysctl_table(pty_root_table); - mnt = kern_mount(&devpts_fs_type); - if (IS_ERR(mnt)) { - err = PTR_ERR(mnt); - unregister_filesystem(&devpts_fs_type); - unregister_sysctl_table(table); - } else { - devpts_mnt = mnt; - } + register_sysctl_table(pty_root_table); } return err; } diff --git a/fs/direct-io.c b/fs/direct-io.c index 3bf3f20f8ecc..f3b4408be590 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -628,11 +628,11 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, map_bh->b_size = fs_count << i_blkbits; /* - * For writes inside i_size on a DIO_SKIP_HOLES filesystem we - * forbid block creations: only overwrites are permitted. - * We will return early to the caller once we see an - * unmapped buffer head returned, and the caller will fall - * back to buffered I/O. + * For writes that could fill holes inside i_size on a + * DIO_SKIP_HOLES filesystem we forbid block creations: only + * overwrites are permitted. We will return early to the caller + * once we see an unmapped buffer head returned, and the caller + * will fall back to buffered I/O. * * Otherwise the decision is left to the get_blocks method, * which may decide to handle it or also return an unmapped @@ -640,8 +640,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, */ create = dio->rw & WRITE; if (dio->flags & DIO_SKIP_HOLES) { - if (sdio->block_in_file < (i_size_read(dio->inode) >> - sdio->blkbits)) + if (fs_startblk <= ((i_size_read(dio->inode) - 1) >> + i_blkbits)) create = 0; } diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index ebd40f46ed4c..e5e29f8c920b 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -45,7 +45,7 @@ * ecryptfs_to_hex * @dst: Buffer to take hex character representation of contents of * src; must be at least of size (src_size * 2) - * @src: Buffer to be converted to a hex string respresentation + * @src: Buffer to be converted to a hex string representation * @src_size: number of bytes to convert */ void ecryptfs_to_hex(char *dst, char *src, size_t src_size) @@ -60,7 +60,7 @@ void ecryptfs_to_hex(char *dst, char *src, size_t src_size) * ecryptfs_from_hex * @dst: Buffer to take the bytes from src hex; must be at least of * size (src_size / 2) - * @src: Buffer to be converted from a hex string respresentation to raw value + * @src: Buffer to be converted from a hex string representation to raw value * @dst_size: size of dst buffer, or number of hex characters pairs to convert */ void ecryptfs_from_hex(char *dst, char *src, int dst_size) @@ -953,7 +953,7 @@ struct ecryptfs_cipher_code_str_map_elem { }; /* Add support for additional ciphers by adding elements here. The - * cipher_code is whatever OpenPGP applicatoins use to identify the + * cipher_code is whatever OpenPGP applications use to identify the * ciphers. List in order of probability. */ static struct ecryptfs_cipher_code_str_map_elem ecryptfs_cipher_code_str_map[] = { @@ -1141,12 +1141,13 @@ ecryptfs_write_metadata_to_contents(struct inode *ecryptfs_inode, static int ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry, + struct inode *ecryptfs_inode, char *page_virt, size_t size) { int rc; - rc = ecryptfs_setxattr(ecryptfs_dentry, ECRYPTFS_XATTR_NAME, page_virt, - size, 0); + rc = ecryptfs_setxattr(ecryptfs_dentry, ecryptfs_inode, + ECRYPTFS_XATTR_NAME, page_virt, size, 0); return rc; } @@ -1215,8 +1216,8 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry, goto out_free; } if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) - rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry, virt, - size); + rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry, ecryptfs_inode, + virt, size); else rc = ecryptfs_write_metadata_to_contents(ecryptfs_inode, virt, virt_len); @@ -1409,7 +1410,7 @@ int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry, * * Common entry point for reading file metadata. From here, we could * retrieve the header information from the header region of the file, - * the xattr region of the file, or some other repostory that is + * the xattr region of the file, or some other repository that is * stored separately from the file itself. The current implementation * supports retrieving the metadata information from the file contents * and from the xattr region. diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 3ec495db7e82..4ba1547bb9ad 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -609,8 +609,8 @@ ssize_t ecryptfs_getxattr_lower(struct dentry *lower_dentry, struct inode *lower_inode, const char *name, void *value, size_t size); int -ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags); +ecryptfs_setxattr(struct dentry *dentry, struct inode *inode, const char *name, + const void *value, size_t size, int flags); int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); #ifdef CONFIG_ECRYPT_FS_MESSAGING int ecryptfs_process_response(struct ecryptfs_daemon *daemon, diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 7000b96b783e..ca4e83750214 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -169,9 +169,22 @@ out: return rc; } +static int ecryptfs_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct file *lower_file = ecryptfs_file_to_lower(file); + /* + * Don't allow mmap on top of file systems that don't support it + * natively. If FILESYSTEM_MAX_STACK_DEPTH > 2 or ecryptfs + * allows recursive mounting, this will need to be extended. + */ + if (!lower_file->f_op->mmap) + return -ENODEV; + return generic_file_mmap(file, vma); +} + /** * ecryptfs_open - * @inode: inode speciying file to open + * @inode: inode specifying file to open * @file: Structure to return filled in * * Opens the file specified by inode. @@ -240,7 +253,7 @@ out: /** * ecryptfs_dir_open - * @inode: inode speciying file to open + * @inode: inode specifying file to open * @file: Structure to return filled in * * Opens the file specified by inode. @@ -403,7 +416,7 @@ const struct file_operations ecryptfs_main_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = ecryptfs_compat_ioctl, #endif - .mmap = generic_file_mmap, + .mmap = ecryptfs_mmap, .open = ecryptfs_open, .flush = ecryptfs_flush, .release = ecryptfs_release, diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 318b04689d76..9d153b6a1d72 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -1001,7 +1001,8 @@ static int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, } int -ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, +ecryptfs_setxattr(struct dentry *dentry, struct inode *inode, + const char *name, const void *value, size_t size, int flags) { int rc = 0; @@ -1014,8 +1015,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, } rc = vfs_setxattr(lower_dentry, name, value, size, flags); - if (!rc && d_really_is_positive(dentry)) - fsstack_copy_attr_all(d_inode(dentry), d_inode(lower_dentry)); + if (!rc && inode) + fsstack_copy_attr_all(inode, d_inode(lower_dentry)); out: return rc; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 1698132d0e57..612004495141 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -738,8 +738,7 @@ static void ecryptfs_free_kmem_caches(void) struct ecryptfs_cache_info *info; info = &ecryptfs_cache_infos[i]; - if (*(info->cache)) - kmem_cache_destroy(*(info->cache)); + kmem_cache_destroy(*(info->cache)); } } diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 148d11b514fb..9c3437c8a5b1 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -442,7 +442,8 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode) if (size < 0) size = 8; put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt); - rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME, + rc = lower_inode->i_op->setxattr(lower_dentry, lower_inode, + ECRYPTFS_XATTR_NAME, xattr_virt, size, 0); inode_unlock(lower_inode); if (rc) diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index 7fd3b867ce65..7b9e9c1842d5 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c @@ -18,10 +18,11 @@ ext2_xattr_security_get(const struct xattr_handler *handler, static int ext2_xattr_security_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - return ext2_xattr_set(d_inode(dentry), EXT2_XATTR_INDEX_SECURITY, name, + return ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, name, value, size, flags); } diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index 0f85705ff519..65049b71af13 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c @@ -25,10 +25,11 @@ ext2_xattr_trusted_get(const struct xattr_handler *handler, static int ext2_xattr_trusted_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - return ext2_xattr_set(d_inode(dentry), EXT2_XATTR_INDEX_TRUSTED, name, + return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name, value, size, flags); } diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index 1fafd27037cc..fb2f992ae763 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c @@ -29,13 +29,14 @@ ext2_xattr_user_get(const struct xattr_handler *handler, static int ext2_xattr_user_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - if (!test_opt(dentry->d_sb, XATTR_USER)) + if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; - return ext2_xattr_set(d_inode(dentry), EXT2_XATTR_INDEX_USER, + return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, value, size, flags); } diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 123a7d010efe..a8921112030d 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -22,10 +22,11 @@ ext4_xattr_security_get(const struct xattr_handler *handler, static int ext4_xattr_security_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_SECURITY, + return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name, value, size, flags); } diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index 60652fa24cbc..c7765c735714 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c @@ -29,10 +29,11 @@ ext4_xattr_trusted_get(const struct xattr_handler *handler, static int ext4_xattr_trusted_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_TRUSTED, + return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name, value, size, flags); } diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index 17a446ffecd3..ca20e423034b 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c @@ -30,12 +30,13 @@ ext4_xattr_user_get(const struct xattr_handler *handler, static int ext4_xattr_user_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - if (!test_opt(dentry->d_sb, XATTR_USER)) + if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; - return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_USER, + return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name, value, size, flags); } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 00ea56797258..e3decae3acfb 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -50,10 +50,11 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler, } static int f2fs_xattr_generic_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, const void *value, + struct dentry *unused, struct inode *inode, + const char *name, const void *value, size_t size, int flags) { - struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); switch (handler->flags) { case F2FS_XATTR_INDEX_USER: @@ -69,7 +70,7 @@ static int f2fs_xattr_generic_set(const struct xattr_handler *handler, default: return -EINVAL; } - return f2fs_setxattr(d_inode(dentry), handler->flags, name, + return f2fs_setxattr(inode, handler->flags, name, value, size, NULL, flags); } @@ -95,11 +96,10 @@ static int f2fs_xattr_advise_get(const struct xattr_handler *handler, } static int f2fs_xattr_advise_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, const void *value, + struct dentry *unused, struct inode *inode, + const char *name, const void *value, size_t size, int flags) { - struct inode *inode = d_inode(dentry); - if (!inode_owner_or_capable(inode)) return -EPERM; if (value == NULL) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 989a2cef6b76..fe7e83a45eff 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -483,9 +483,9 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) goto out_free; } inode->i_state |= I_WB_SWITCH; + __iget(inode); spin_unlock(&inode->i_lock); - ihold(inode); isw->inode = inode; atomic_inc(&isw_nr_in_flight); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 3078b679fcd1..c8c4f79c7ce1 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -887,6 +887,8 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie) put_page(results[i]); } + wake_up_bit(&cookie->flags, 0); + _leave(""); } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b9419058108f..cca7b048c07b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -341,8 +341,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, struct dentry *newent; bool outarg_valid = true; + fuse_lock_inode(dir); err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, &outarg, &inode); + fuse_unlock_inode(dir); if (err == -ENOENT) { outarg_valid = false; err = 0; @@ -478,7 +480,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct fuse_conn *fc = get_fuse_conn(dir); struct dentry *res = NULL; - if (d_unhashed(entry)) { + if (d_in_lookup(entry)) { res = fuse_lookup(dir, entry, 0); if (IS_ERR(res)) return PTR_ERR(res); @@ -1341,7 +1343,9 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx) fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, FUSE_READDIR); } + fuse_lock_inode(inode); fuse_request_send(fc, req); + fuse_unlock_inode(inode); nbytes = req->out.args[0].size; err = req->out.h.error; fuse_put_request(fc, req); @@ -1719,10 +1723,10 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, return fuse_update_attributes(inode, stat, NULL, NULL); } -static int fuse_setxattr(struct dentry *entry, const char *name, - const void *value, size_t size, int flags) +static int fuse_setxattr(struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - struct inode *inode = d_inode(entry); struct fuse_conn *fc = get_fuse_conn(inode); FUSE_ARGS(args); struct fuse_setxattr_in inarg; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index eddbe02c4028..929c383432b0 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -110,6 +110,9 @@ struct fuse_inode { /** Miscellaneous bits describing inode state */ unsigned long state; + + /** Lock for serializing lookup and readdir for back compatibility*/ + struct mutex mutex; }; /** FUSE inode state bits */ @@ -540,6 +543,9 @@ struct fuse_conn { /** write-back cache policy (default is write-through) */ unsigned writeback_cache:1; + /** allow parallel lookups and readdir (default is serialized) */ + unsigned parallel_dirops:1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction @@ -956,4 +962,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, void fuse_set_initialized(struct fuse_conn *fc); +void fuse_unlock_inode(struct inode *inode); +void fuse_lock_inode(struct inode *inode); + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 1ce67668a8e1..9961d8432ce3 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -97,6 +97,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&fi->queued_writes); INIT_LIST_HEAD(&fi->writepages); init_waitqueue_head(&fi->page_waitq); + mutex_init(&fi->mutex); fi->forget = fuse_alloc_forget(); if (!fi->forget) { kmem_cache_free(fuse_inode_cachep, inode); @@ -117,6 +118,7 @@ static void fuse_destroy_inode(struct inode *inode) struct fuse_inode *fi = get_fuse_inode(inode); BUG_ON(!list_empty(&fi->write_files)); BUG_ON(!list_empty(&fi->queued_writes)); + mutex_destroy(&fi->mutex); kfree(fi->forget); call_rcu(&inode->i_rcu, fuse_i_callback); } @@ -351,6 +353,18 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, return 0; } +void fuse_lock_inode(struct inode *inode) +{ + if (!get_fuse_conn(inode)->parallel_dirops) + mutex_lock(&get_fuse_inode(inode)->mutex); +} + +void fuse_unlock_inode(struct inode *inode) +{ + if (!get_fuse_conn(inode)->parallel_dirops) + mutex_unlock(&get_fuse_inode(inode)->mutex); +} + static void fuse_umount_begin(struct super_block *sb) { fuse_abort_conn(get_fuse_conn_super(sb)); @@ -898,6 +912,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->async_dio = 1; if (arg->flags & FUSE_WRITEBACK_CACHE) fc->writeback_cache = 1; + if (arg->flags & FUSE_PARALLEL_DIROPS) + fc->parallel_dirops = 1; if (arg->time_gran && arg->time_gran <= 1000000000) fc->sb->s_time_gran = arg->time_gran; } else { @@ -928,7 +944,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | - FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT; + FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | + FUSE_PARALLEL_DIROPS; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 37b7bc14c8da..82df36886938 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -140,6 +140,32 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc) return nobh_writepage(page, gfs2_get_block_noalloc, wbc); } +/* This is the same as calling block_write_full_page, but it also + * writes pages outside of i_size + */ +int gfs2_write_full_page(struct page *page, get_block_t *get_block, + struct writeback_control *wbc) +{ + struct inode * const inode = page->mapping->host; + loff_t i_size = i_size_read(inode); + const pgoff_t end_index = i_size >> PAGE_SHIFT; + unsigned offset; + + /* + * The page straddles i_size. It must be zeroed out on each and every + * writepage invocation because it may be mmapped. "A file is mapped + * in multiples of the page size. For a file that is not a multiple of + * the page size, the remaining memory is zeroed when mapped, and + * writes to that region are not written out to the file." + */ + offset = i_size & (PAGE_SIZE-1); + if (page->index == end_index && offset) + zero_user_segment(page, offset, PAGE_SIZE); + + return __block_write_full_page(inode, page, get_block, wbc, + end_buffer_async_write); +} + /** * __gfs2_jdata_writepage - The core of jdata writepage * @page: The page to write @@ -165,7 +191,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w } gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); } - return block_write_full_page(page, gfs2_get_block_noalloc, wbc); + return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc); } /** @@ -180,27 +206,20 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); int ret; - int done_trans = 0; - if (PageChecked(page)) { - if (wbc->sync_mode != WB_SYNC_ALL) - goto out_ignore; - ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); - if (ret) - goto out_ignore; - done_trans = 1; - } - ret = gfs2_writepage_common(page, wbc); - if (ret > 0) - ret = __gfs2_jdata_writepage(page, wbc); - if (done_trans) - gfs2_trans_end(sdp); + if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) + goto out; + if (PageChecked(page) || current->journal_info) + goto out_ignore; + ret = __gfs2_jdata_writepage(page, wbc); return ret; out_ignore: redirty_page_for_writepage(wbc, page); +out: unlock_page(page); return 0; } diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 30822b148f3e..5173b98ca036 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -117,7 +117,7 @@ static int gfs2_dentry_delete(const struct dentry *dentry) return 0; ginode = GFS2_I(d_inode(dentry)); - if (!ginode->i_iopen_gh.gh_gl) + if (!gfs2_holder_initialized(&ginode->i_iopen_gh)) return 0; if (test_bit(GLF_DEMOTE, &ginode->i_iopen_gh.gh_gl->gl_flags)) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 4a01f30e9995..e30cc9fb2bef 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -783,12 +783,15 @@ static int get_leaf_nr(struct gfs2_inode *dip, u32 index, u64 *leaf_out) { __be64 *hash; + int error; hash = gfs2_dir_get_hash_table(dip); - if (IS_ERR(hash)) - return PTR_ERR(hash); - *leaf_out = be64_to_cpu(*(hash + index)); - return 0; + error = PTR_ERR_OR_ZERO(hash); + + if (!error) + *leaf_out = be64_to_cpu(*(hash + index)); + + return error; } static int get_first_leaf(struct gfs2_inode *dip, u32 index, @@ -798,7 +801,7 @@ static int get_first_leaf(struct gfs2_inode *dip, u32 index, int error; error = get_leaf_nr(dip, index, &leaf_no); - if (!IS_ERR_VALUE(error)) + if (!error) error = get_leaf(dip, leaf_no, bh_out); return error; @@ -1014,7 +1017,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) index = name->hash >> (32 - dip->i_depth); error = get_leaf_nr(dip, index, &leaf_no); - if (IS_ERR_VALUE(error)) + if (error) return error; /* Get the old leaf block */ @@ -1660,7 +1663,8 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name, brelse(bh); if (fail_on_exist) return ERR_PTR(-EEXIST); - inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino); + inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, + GFS2_BLKST_FREE /* ignore */); if (!IS_ERR(inode)) GFS2_I(inode)->i_rahead = rahead; return inode; diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index d5bda8513457..a332f3cd925e 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -137,21 +137,10 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, struct gfs2_sbd *sdp = sb->s_fs_info; struct inode *inode; - inode = gfs2_ilookup(sb, inum->no_addr); - if (inode) { - if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { - iput(inode); - return ERR_PTR(-ESTALE); - } - goto out_inode; - } - inode = gfs2_lookup_by_inum(sdp, inum->no_addr, &inum->no_formal_ino, GFS2_BLKST_DINODE); if (IS_ERR(inode)) return ERR_CAST(inode); - -out_inode: return d_obtain_alias(inode); } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index e0f98e483aec..320e65e61938 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1098,7 +1098,7 @@ static void do_unflock(struct file *file, struct file_lock *fl) mutex_lock(&fp->f_fl_mutex); locks_lock_file_wait(file, fl); - if (fl_gh->gh_gl) { + if (gfs2_holder_initialized(fl_gh)) { gfs2_glock_dq(fl_gh); gfs2_holder_uninit(fl_gh); } diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 706fd9352f36..3a90b2b5b9bb 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -575,7 +575,6 @@ static void delete_work_func(struct work_struct *work) { struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete); struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct gfs2_inode *ip; struct inode *inode; u64 no_addr = gl->gl_name.ln_number; @@ -585,13 +584,7 @@ static void delete_work_func(struct work_struct *work) if (test_bit(GLF_INODE_CREATING, &gl->gl_flags)) goto out; - ip = gl->gl_object; - /* Note: Unsafe to dereference ip as we don't hold right refs/locks */ - - if (ip) - inode = gfs2_ilookup(sdp->sd_vfs, no_addr); - else - inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); + inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); if (inode && !IS_ERR(inode)) { d_prune_aliases(inode); iput(inode); @@ -808,7 +801,7 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) { put_pid(gh->gh_owner_pid); gfs2_glock_put(gh->gh_gl); - gh->gh_gl = NULL; + gfs2_holder_mark_uninitialized(gh); gh->gh_ip = 0; } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 46ab67fc16da..ab1ef322f7a5 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -247,4 +247,14 @@ extern void gfs2_unregister_debugfs(void); extern const struct lm_lockops gfs2_dlm_ops; +static inline void gfs2_holder_mark_uninitialized(struct gfs2_holder *gh) +{ + gh->gh_gl = NULL; +} + +static inline bool gfs2_holder_initialized(struct gfs2_holder *gh) +{ + return gh->gh_gl; +} + #endif /* __GLOCK_DOT_H__ */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 21dc784f66c2..e0621cacf134 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -37,9 +37,35 @@ #include "super.h" #include "glops.h" -struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) +static int iget_test(struct inode *inode, void *opaque) { - return ilookup(sb, (unsigned long)no_addr); + u64 no_addr = *(u64 *)opaque; + + return GFS2_I(inode)->i_no_addr == no_addr; +} + +static int iget_set(struct inode *inode, void *opaque) +{ + u64 no_addr = *(u64 *)opaque; + + GFS2_I(inode)->i_no_addr = no_addr; + inode->i_ino = no_addr; + return 0; +} + +static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) +{ + struct inode *inode; + +repeat: + inode = iget5_locked(sb, no_addr, iget_test, iget_set, &no_addr); + if (!inode) + return inode; + if (is_bad_inode(inode)) { + iput(inode); + goto repeat; + } + return inode; } /** @@ -78,26 +104,37 @@ static void gfs2_set_iop(struct inode *inode) /** * gfs2_inode_lookup - Lookup an inode * @sb: The super block - * @no_addr: The inode number * @type: The type of the inode + * @no_addr: The inode number + * @no_formal_ino: The inode generation number + * @blktype: Requested block type (GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; + * GFS2_BLKST_FREE do indicate not to verify) + * + * If @type is DT_UNKNOWN, the inode type is fetched from disk. + * + * If @blktype is anything other than GFS2_BLKST_FREE (which is used as a + * placeholder because it doesn't otherwise make sense), the on-disk block type + * is verified to be @blktype. * * Returns: A VFS inode, or an error */ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, - u64 no_addr, u64 no_formal_ino) + u64 no_addr, u64 no_formal_ino, + unsigned int blktype) { struct inode *inode; struct gfs2_inode *ip; struct gfs2_glock *io_gl = NULL; + struct gfs2_holder i_gh; int error; - inode = iget_locked(sb, (unsigned long)no_addr); + gfs2_holder_mark_uninitialized(&i_gh); + inode = gfs2_iget(sb, no_addr); if (!inode) return ERR_PTR(-ENOMEM); ip = GFS2_I(inode); - ip->i_no_addr = no_addr; if (inode->i_state & I_NEW) { struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -112,10 +149,29 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, if (unlikely(error)) goto fail_put; + if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) { + /* + * The GL_SKIP flag indicates to skip reading the inode + * block. We read the inode with gfs2_inode_refresh + * after possibly checking the block type. + */ + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, + GL_SKIP, &i_gh); + if (error) + goto fail_put; + + if (blktype != GFS2_BLKST_FREE) { + error = gfs2_check_blk_type(sdp, no_addr, + blktype); + if (error) + goto fail_put; + } + } + set_bit(GIF_INVALID, &ip->i_flags); error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (unlikely(error)) - goto fail_iopen; + goto fail_put; ip->i_iopen_gh.gh_gl->gl_object = ip; gfs2_glock_put(io_gl); @@ -134,6 +190,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, unlock_new_inode(inode); } + if (gfs2_holder_initialized(&i_gh)) + gfs2_glock_dq_uninit(&i_gh); return inode; fail_refresh: @@ -141,10 +199,11 @@ fail_refresh: ip->i_iopen_gh.gh_gl->gl_object = NULL; gfs2_glock_dq_wait(&ip->i_iopen_gh); gfs2_holder_uninit(&ip->i_iopen_gh); -fail_iopen: +fail_put: if (io_gl) gfs2_glock_put(io_gl); -fail_put: + if (gfs2_holder_initialized(&i_gh)) + gfs2_glock_dq_uninit(&i_gh); ip->i_gl->gl_object = NULL; fail: iget_failed(inode); @@ -155,23 +214,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, u64 *no_formal_ino, unsigned int blktype) { struct super_block *sb = sdp->sd_vfs; - struct gfs2_holder i_gh; - struct inode *inode = NULL; + struct inode *inode; int error; - /* Must not read in block until block type is verified */ - error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, - LM_ST_EXCLUSIVE, GL_SKIP, &i_gh); - if (error) - return ERR_PTR(error); - - error = gfs2_check_blk_type(sdp, no_addr, blktype); - if (error) - goto fail; - - inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0); + inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, blktype); if (IS_ERR(inode)) - goto fail; + return inode; /* Two extra checks for NFS only */ if (no_formal_ino) { @@ -182,16 +230,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, error = -EIO; if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) goto fail_iput; - - error = 0; } + return inode; -fail: - gfs2_glock_dq_uninit(&i_gh); - return error ? ERR_PTR(error) : inode; fail_iput: iput(inode); - goto fail; + return ERR_PTR(error); } @@ -236,8 +280,8 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, struct gfs2_holder d_gh; int error = 0; struct inode *inode = NULL; - int unlock = 0; + gfs2_holder_mark_uninitialized(&d_gh); if (!name->len || name->len > GFS2_FNAMESIZE) return ERR_PTR(-ENAMETOOLONG); @@ -252,7 +296,6 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); if (error) return ERR_PTR(error); - unlock = 1; } if (!is_root) { @@ -265,7 +308,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, if (IS_ERR(inode)) error = PTR_ERR(inode); out: - if (unlock) + if (gfs2_holder_initialized(&d_gh)) gfs2_glock_dq_uninit(&d_gh); if (error == -ENOENT) return NULL; @@ -1189,7 +1232,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, struct dentry *d; bool excl = !!(flags & O_EXCL); - if (!d_unhashed(dentry)) + if (!d_in_lookup(dentry)) goto skip_lookup; d = __gfs2_lookup(dir, dentry, file, opened); @@ -1309,7 +1352,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, struct gfs2_inode *ip = GFS2_I(d_inode(odentry)); struct gfs2_inode *nip = NULL; struct gfs2_sbd *sdp = GFS2_SB(odir); - struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; + struct gfs2_holder ghs[5], r_gh; struct gfs2_rgrpd *nrgd; unsigned int num_gh; int dir_rename = 0; @@ -1317,6 +1360,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, unsigned int x; int error; + gfs2_holder_mark_uninitialized(&r_gh); if (d_really_is_positive(ndentry)) { nip = GFS2_I(d_inode(ndentry)); if (ip == nip) @@ -1506,7 +1550,7 @@ out_gunlock: gfs2_holder_uninit(ghs + x); } out_gunlock_r: - if (r_gh.gh_gl) + if (gfs2_holder_initialized(&r_gh)) gfs2_glock_dq_uninit(&r_gh); out: return error; @@ -1532,13 +1576,14 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry, struct gfs2_inode *oip = GFS2_I(odentry->d_inode); struct gfs2_inode *nip = GFS2_I(ndentry->d_inode); struct gfs2_sbd *sdp = GFS2_SB(odir); - struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; + struct gfs2_holder ghs[5], r_gh; unsigned int num_gh; unsigned int x; umode_t old_mode = oip->i_inode.i_mode; umode_t new_mode = nip->i_inode.i_mode; int error; + gfs2_holder_mark_uninitialized(&r_gh); error = gfs2_rindex_update(sdp); if (error) return error; @@ -1646,7 +1691,7 @@ out_gunlock: gfs2_holder_uninit(ghs + x); } out_gunlock_r: - if (r_gh.gh_gl) + if (gfs2_holder_initialized(&r_gh)) gfs2_glock_dq_uninit(&r_gh); out: return error; @@ -1743,9 +1788,8 @@ int gfs2_permission(struct inode *inode, int mask) struct gfs2_inode *ip; struct gfs2_holder i_gh; int error; - int unlock = 0; - + gfs2_holder_mark_uninitialized(&i_gh); ip = GFS2_I(inode); if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { if (mask & MAY_NOT_BLOCK) @@ -1753,14 +1797,13 @@ int gfs2_permission(struct inode *inode, int mask) error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; - unlock = 1; } if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) error = -EACCES; else error = generic_permission(inode, mask); - if (unlock) + if (gfs2_holder_initialized(&i_gh)) gfs2_glock_dq_uninit(&i_gh); return error; @@ -1932,17 +1975,16 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int error; - int unlock = 0; + gfs2_holder_mark_uninitialized(&gh); if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); if (error) return error; - unlock = 1; } generic_fillattr(inode, stat); - if (unlock) + if (gfs2_holder_initialized(&gh)) gfs2_glock_dq_uninit(&gh); return 0; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index e1af0d4aa308..7710dfd3af35 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -94,11 +94,11 @@ err: } extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, - u64 no_addr, u64 no_formal_ino); + u64 no_addr, u64 no_formal_ino, + unsigned int blktype); extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, u64 *no_formal_ino, unsigned int blktype); -extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); extern int gfs2_inode_refresh(struct gfs2_inode *ip); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index d5369a109781..8e3ba20d5e9d 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -535,9 +535,9 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA) return 0; - gfs2_replay_incr_blk(sdp, &start); + gfs2_replay_incr_blk(jd, &start); - for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { + for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) { blkno = be64_to_cpu(*ptr++); jd->jd_found_blocks++; @@ -693,7 +693,7 @@ static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, offset = sizeof(struct gfs2_log_descriptor); - for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { + for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) { error = gfs2_replay_read_block(jd, start, &bh); if (error) return error; @@ -762,7 +762,6 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, __be64 *ptr, int pass) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct gfs2_glock *gl = ip->i_gl; unsigned int blks = be32_to_cpu(ld->ld_data1); struct buffer_head *bh_log, *bh_ip; @@ -773,8 +772,8 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA) return 0; - gfs2_replay_incr_blk(sdp, &start); - for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { + gfs2_replay_incr_blk(jd, &start); + for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) { blkno = be64_to_cpu(*ptr++); esc = be64_to_cpu(*ptr++); diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index f99f8e94de3f..74fd0139e6c2 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -45,6 +45,7 @@ static void gfs2_init_inode_once(void *foo) memset(&ip->i_res, 0, sizeof(ip->i_res)); RB_CLEAR_NODE(&ip->i_res.rs_node); ip->i_hash_cache = NULL; + gfs2_holder_mark_uninitialized(&ip->i_iopen_gh); } static void gfs2_init_glock_once(void *foo) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 45463600fb81..b8f6fc9513ef 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -454,7 +454,8 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, struct dentry *dentry; struct inode *inode; - inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); + inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, + GFS2_BLKST_FREE /* ignore */); if (IS_ERR(inode)) { fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); return PTR_ERR(inode); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index ce7d69a2fdc0..6c657b202501 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -883,7 +883,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), &data_blocks, &ind_blocks); - ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_NOFS); + ghs = kmalloc(num_qd * sizeof(struct gfs2_holder), GFP_NOFS); if (!ghs) return -ENOMEM; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 1b645773c98e..113b6095a58d 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -338,7 +338,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start, struct gfs2_log_header_host lh; error = get_log_header(jd, start, &lh); if (!error) { - gfs2_replay_incr_blk(sdp, &start); + gfs2_replay_incr_blk(jd, &start); brelse(bh); continue; } @@ -360,7 +360,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start, } while (length--) - gfs2_replay_incr_blk(sdp, &start); + gfs2_replay_incr_blk(jd, &start); brelse(bh); } @@ -390,7 +390,7 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; lblock = head->lh_blkno; - gfs2_replay_incr_blk(sdp, &lblock); + gfs2_replay_incr_blk(jd, &lblock); bh_map.b_size = 1 << ip->i_inode.i_blkbits; error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0); if (error) diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 6142836cce96..11fdfab4bf99 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -14,9 +14,9 @@ extern struct workqueue_struct *gfs_recovery_wq; -static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk) +static inline void gfs2_replay_incr_blk(struct gfs2_jdesc *jd, unsigned int *blk) { - if (++*blk == sdp->sd_jdesc->jd_blocks) + if (++*blk == jd->jd_blocks) *blk = 0; } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 5bd216901e89..86ccc0159393 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -658,6 +658,7 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) if (rgd) { spin_lock(&rgd->rd_rsspin); __rs_deltree(rs); + BUG_ON(rs->rs_free); spin_unlock(&rgd->rd_rsspin); } } @@ -671,10 +672,8 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount) { down_write(&ip->i_rw_mutex); - if ((wcount == NULL) || (atomic_read(wcount) <= 1)) { + if ((wcount == NULL) || (atomic_read(wcount) <= 1)) gfs2_rs_deltree(&ip->i_res); - BUG_ON(ip->i_res.rs_free); - } up_write(&ip->i_rw_mutex); gfs2_qa_delete(ip, wcount); } @@ -722,6 +721,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) gfs2_free_clones(rgd); kfree(rgd->rd_bits); + rgd->rd_bits = NULL; return_all_reservations(rgd); kmem_cache_free(gfs2_rgrpd_cachep, rgd); } @@ -916,9 +916,6 @@ static int read_rindex_entry(struct gfs2_inode *ip) if (error) goto fail; - rgd->rd_gl->gl_object = rgd; - rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK; - rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1; rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED); if (rgd->rd_data > sdp->sd_max_rg_data) @@ -926,14 +923,20 @@ static int read_rindex_entry(struct gfs2_inode *ip) spin_lock(&sdp->sd_rindex_spin); error = rgd_insert(rgd); spin_unlock(&sdp->sd_rindex_spin); - if (!error) + if (!error) { + rgd->rd_gl->gl_object = rgd; + rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK; + rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr + + rgd->rd_length) * bsize) - 1; return 0; + } error = 0; /* someone else read in the rgrp; free it and ignore it */ gfs2_glock_put(rgd->rd_gl); fail: kfree(rgd->rd_bits); + rgd->rd_bits = NULL; kmem_cache_free(gfs2_rgrpd_cachep, rgd); return error; } @@ -2096,7 +2099,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) { struct gfs2_blkreserv *rs = &ip->i_res; - if (rs->rs_rgd_gh.gh_gl) + if (gfs2_holder_initialized(&rs->rs_rgd_gh)) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); } @@ -2596,7 +2599,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state) { unsigned int x; - rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder), + rlist->rl_ghs = kmalloc(rlist->rl_rgrps * sizeof(struct gfs2_holder), GFP_NOFS | __GFP_NOFAIL); for (x = 0; x < rlist->rl_rgrps; x++) gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 9b2ff353e45f..3a7e60bb39f8 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -855,7 +855,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0); gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); - if (freeze_gh.gh_gl) + if (gfs2_holder_initialized(&freeze_gh)) gfs2_glock_dq_uninit(&freeze_gh); gfs2_quota_cleanup(sdp); @@ -1033,7 +1033,7 @@ static int gfs2_unfreeze(struct super_block *sb) mutex_lock(&sdp->sd_freeze_mutex); if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN || - sdp->sd_freeze_gh.gh_gl == NULL) { + !gfs2_holder_initialized(&sdp->sd_freeze_gh)) { mutex_unlock(&sdp->sd_freeze_mutex); return 0; } @@ -1084,9 +1084,11 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host int error = 0, err; memset(sc, 0, sizeof(struct gfs2_statfs_change_host)); - gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL); + gha = kmalloc(slots * sizeof(struct gfs2_holder), GFP_KERNEL); if (!gha) return -ENOMEM; + for (x = 0; x < slots; x++) + gfs2_holder_mark_uninitialized(gha + x); rgd_next = gfs2_rgrpd_get_first(sdp); @@ -1096,7 +1098,7 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host for (x = 0; x < slots; x++) { gh = gha + x; - if (gh->gh_gl && gfs2_glock_poll(gh)) { + if (gfs2_holder_initialized(gh) && gfs2_glock_poll(gh)) { err = gfs2_glock_wait(gh); if (err) { gfs2_holder_uninit(gh); @@ -1109,7 +1111,7 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host } } - if (gh->gh_gl) + if (gfs2_holder_initialized(gh)) done = 0; else if (rgd_next && !error) { error = gfs2_glock_nq_init(rgd_next->rd_gl, @@ -1304,9 +1306,11 @@ static int gfs2_drop_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); - if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) && inode->i_nlink) { + if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) && + inode->i_nlink && + gfs2_holder_initialized(&ip->i_iopen_gh)) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; - if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) + if (test_bit(GLF_DEMOTE, &gl->gl_flags)) clear_nlink(inode); } return generic_drop_inode(inode); @@ -1551,7 +1555,7 @@ static void gfs2_evict_inode(struct inode *inode) goto out_truncate; } - if (ip->i_iopen_gh.gh_gl && + if (gfs2_holder_initialized(&ip->i_iopen_gh) && test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_wait(&ip->i_iopen_gh); @@ -1610,7 +1614,7 @@ out_unlock: if (gfs2_rs_active(&ip->i_res)) gfs2_rs_deltree(&ip->i_res); - if (ip->i_iopen_gh.gh_gl) { + if (gfs2_holder_initialized(&ip->i_iopen_gh)) { if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_wait(&ip->i_iopen_gh); @@ -1632,7 +1636,7 @@ out: gfs2_glock_add_to_lru(ip->i_gl); gfs2_glock_put(ip->i_gl); ip->i_gl = NULL; - if (ip->i_iopen_gh.gh_gl) { + if (gfs2_holder_initialized(&ip->i_iopen_gh)) { ip->i_iopen_gh.gh_gl->gl_object = NULL; ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_wait(&ip->i_iopen_gh); diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index f42ab53bd30d..3a2853504084 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1251,10 +1251,10 @@ int __gfs2_xattr_set(struct inode *inode, const char *name, } static int gfs2_xattr_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - struct inode *inode = d_inode(dentry); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int ret; diff --git a/fs/hfs/attr.c b/fs/hfs/attr.c index 064f92f17efc..d9a86919fdf6 100644 --- a/fs/hfs/attr.c +++ b/fs/hfs/attr.c @@ -13,10 +13,10 @@ #include "hfs_fs.h" #include "btree.h" -int hfs_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) +int hfs_setxattr(struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - struct inode *inode = d_inode(dentry); struct hfs_find_data fd; hfs_cat_rec rec; struct hfs_cat_file *file; diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index fa3eed86837c..ee2f385811c8 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -212,7 +212,7 @@ extern void hfs_evict_inode(struct inode *); extern void hfs_delete_inode(struct inode *); /* attr.c */ -extern int hfs_setxattr(struct dentry *dentry, const char *name, +extern int hfs_setxattr(struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags); extern ssize_t hfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size); diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index 4f118d282a7a..d37bb88dc746 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -424,7 +424,7 @@ static int copy_name(char *buffer, const char *xattr_name, int name_len) return len; } -int hfsplus_setxattr(struct dentry *dentry, const char *name, +int hfsplus_setxattr(struct inode *inode, const char *name, const void *value, size_t size, int flags, const char *prefix, size_t prefixlen) { @@ -437,8 +437,7 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name, return -ENOMEM; strcpy(xattr_name, prefix); strcpy(xattr_name + prefixlen, name); - res = __hfsplus_setxattr(d_inode(dentry), xattr_name, value, size, - flags); + res = __hfsplus_setxattr(inode, xattr_name, value, size, flags); kfree(xattr_name); return res; } @@ -864,8 +863,9 @@ static int hfsplus_osx_getxattr(const struct xattr_handler *handler, } static int hfsplus_osx_setxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *buffer, + size_t size, int flags) { /* * Don't allow setting properly prefixed attributes @@ -880,7 +880,7 @@ static int hfsplus_osx_setxattr(const struct xattr_handler *handler, * creates), so we pass the name through unmodified (after * ensuring it doesn't conflict with another namespace). */ - return __hfsplus_setxattr(d_inode(dentry), name, buffer, size, flags); + return __hfsplus_setxattr(inode, name, buffer, size, flags); } const struct xattr_handler hfsplus_xattr_osx_handler = { diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h index d04ba6f58df2..68f6b539371f 100644 --- a/fs/hfsplus/xattr.h +++ b/fs/hfsplus/xattr.h @@ -21,7 +21,7 @@ extern const struct xattr_handler *hfsplus_xattr_handlers[]; int __hfsplus_setxattr(struct inode *inode, const char *name, const void *value, size_t size, int flags); -int hfsplus_setxattr(struct dentry *dentry, const char *name, +int hfsplus_setxattr(struct inode *inode, const char *name, const void *value, size_t size, int flags, const char *prefix, size_t prefixlen); diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c index ae2ca8c2e335..37b3efa733ef 100644 --- a/fs/hfsplus/xattr_security.c +++ b/fs/hfsplus/xattr_security.c @@ -23,10 +23,11 @@ static int hfsplus_security_getxattr(const struct xattr_handler *handler, } static int hfsplus_security_setxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *buffer, + size_t size, int flags) { - return hfsplus_setxattr(dentry, name, buffer, size, flags, + return hfsplus_setxattr(inode, name, buffer, size, flags, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); } diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c index eae2947060aa..94519d6c627d 100644 --- a/fs/hfsplus/xattr_trusted.c +++ b/fs/hfsplus/xattr_trusted.c @@ -21,10 +21,11 @@ static int hfsplus_trusted_getxattr(const struct xattr_handler *handler, } static int hfsplus_trusted_setxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *buffer, + size_t size, int flags) { - return hfsplus_setxattr(dentry, name, buffer, size, flags, + return hfsplus_setxattr(inode, name, buffer, size, flags, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); } diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c index 3c9eec3e4c7b..fae6c0ea0030 100644 --- a/fs/hfsplus/xattr_user.c +++ b/fs/hfsplus/xattr_user.c @@ -21,10 +21,11 @@ static int hfsplus_user_getxattr(const struct xattr_handler *handler, } static int hfsplus_user_setxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *buffer, + size_t size, int flags) { - return hfsplus_setxattr(dentry, name, buffer, size, flags, + return hfsplus_setxattr(inode, name, buffer, size, flags, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 458cf463047b..82067ca22f2b 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -15,6 +15,7 @@ #include <linux/sched.h> #include <linux/bitmap.h> #include <linux/slab.h> +#include <linux/seq_file.h> /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ @@ -453,10 +454,6 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) int lowercase, eas, chk, errs, chkdsk, timeshift; int o; struct hpfs_sb_info *sbi = hpfs_sb(s); - char *new_opts = kstrdup(data, GFP_KERNEL); - - if (!new_opts) - return -ENOMEM; sync_filesystem(s); @@ -493,17 +490,44 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) if (!(*flags & MS_RDONLY)) mark_dirty(s, 1); - replace_mount_options(s, new_opts); - hpfs_unlock(s); return 0; out_err: hpfs_unlock(s); - kfree(new_opts); return -EINVAL; } +static int hpfs_show_options(struct seq_file *seq, struct dentry *root) +{ + struct hpfs_sb_info *sbi = hpfs_sb(root->d_sb); + + seq_printf(seq, ",uid=%u", from_kuid_munged(&init_user_ns, sbi->sb_uid)); + seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, sbi->sb_gid)); + seq_printf(seq, ",umask=%03o", (~sbi->sb_mode & 0777)); + if (sbi->sb_lowercase) + seq_printf(seq, ",case=lower"); + if (!sbi->sb_chk) + seq_printf(seq, ",check=none"); + if (sbi->sb_chk == 2) + seq_printf(seq, ",check=strict"); + if (!sbi->sb_err) + seq_printf(seq, ",errors=continue"); + if (sbi->sb_err == 2) + seq_printf(seq, ",errors=panic"); + if (!sbi->sb_chkdsk) + seq_printf(seq, ",chkdsk=no"); + if (sbi->sb_chkdsk == 2) + seq_printf(seq, ",chkdsk=always"); + if (!sbi->sb_eas) + seq_printf(seq, ",eas=no"); + if (sbi->sb_eas == 1) + seq_printf(seq, ",eas=ro"); + if (sbi->sb_timeshift) + seq_printf(seq, ",timeshift=%d", sbi->sb_timeshift); + return 0; +} + /* Super operations */ static const struct super_operations hpfs_sops = @@ -514,7 +538,7 @@ static const struct super_operations hpfs_sops = .put_super = hpfs_put_super, .statfs = hpfs_statfs, .remount_fs = hpfs_remount_fs, - .show_options = generic_show_options, + .show_options = hpfs_show_options, }; static int hpfs_fill_super(struct super_block *s, void *options, int silent) @@ -537,8 +561,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) int o; - save_mount_options(s, options); - sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) { return -ENOMEM; diff --git a/fs/internal.h b/fs/internal.h index b71deeecea17..f57ced528cde 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -130,6 +130,7 @@ extern int invalidate_inodes(struct super_block *, bool); extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); extern int d_set_mounted(struct dentry *dentry); extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc); +extern struct dentry *d_alloc_cursor(struct dentry *); /* * read_write.c diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b31852f76f46..e3ca4b4cac84 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2329,18 +2329,10 @@ void *jbd2_alloc(size_t size, gfp_t flags) BUG_ON(size & (size-1)); /* Must be a power of 2 */ - flags |= __GFP_REPEAT; - if (size == PAGE_SIZE) - ptr = (void *)__get_free_pages(flags, 0); - else if (size > PAGE_SIZE) { - int order = get_order(size); - - if (order < 3) - ptr = (void *)__get_free_pages(flags, order); - else - ptr = vmalloc(size); - } else + if (size < PAGE_SIZE) ptr = kmem_cache_alloc(get_slab(size), flags); + else + ptr = (void *)__get_free_pages(flags, get_order(size)); /* Check alignment; SLUB has gotten this wrong in the past, * and this can lead to user data corruption! */ @@ -2351,20 +2343,10 @@ void *jbd2_alloc(size_t size, gfp_t flags) void jbd2_free(void *ptr, size_t size) { - if (size == PAGE_SIZE) { - free_pages((unsigned long)ptr, 0); - return; - } - if (size > PAGE_SIZE) { - int order = get_order(size); - - if (order < 3) - free_pages((unsigned long)ptr, order); - else - vfree(ptr); - return; - } - kmem_cache_free(get_slab(size), ptr); + if (size < PAGE_SIZE) + kmem_cache_free(get_slab(size), ptr); + else + free_pages((unsigned long)ptr, get_order(size)); }; /* diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c index 3ed9a4b49778..c2332e30f218 100644 --- a/fs/jffs2/security.c +++ b/fs/jffs2/security.c @@ -57,10 +57,11 @@ static int jffs2_security_getxattr(const struct xattr_handler *handler, } static int jffs2_security_setxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *buffer, + size_t size, int flags) { - return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_SECURITY, + return do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size, flags); } diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c index 4ebecff1d922..5d6030826c52 100644 --- a/fs/jffs2/xattr_trusted.c +++ b/fs/jffs2/xattr_trusted.c @@ -25,10 +25,11 @@ static int jffs2_trusted_getxattr(const struct xattr_handler *handler, } static int jffs2_trusted_setxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *buffer, + size_t size, int flags) { - return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_TRUSTED, + return do_jffs2_setxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size, flags); } diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c index bce249e1b277..9d027b4abcf9 100644 --- a/fs/jffs2/xattr_user.c +++ b/fs/jffs2/xattr_user.c @@ -25,10 +25,11 @@ static int jffs2_user_getxattr(const struct xattr_handler *handler, } static int jffs2_user_setxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *buffer, + size_t size, int flags) { - return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_USER, + return do_jffs2_setxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size, flags); } diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index beb182b503b3..0bf3c33aedff 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -943,11 +943,10 @@ static int jfs_xattr_get(const struct xattr_handler *handler, } static int jfs_xattr_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - struct inode *inode = d_inode(dentry); - name = xattr_full_name(handler, name); return __jfs_xattr_set(inode, name, value, size, flags); } @@ -962,11 +961,10 @@ static int jfs_xattr_get_os2(const struct xattr_handler *handler, } static int jfs_xattr_set_os2(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - struct inode *inode = d_inode(dentry); - if (is_known_namespace(name)) return -EOPNOTSUPP; return __jfs_xattr_set(inode, name, value, size, flags); diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 1719649d7ad7..63b925d5ba1e 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -160,10 +160,11 @@ static int kernfs_node_setsecdata(struct kernfs_node *kn, void **secdata, return 0; } -int kernfs_iop_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) +int kernfs_iop_setxattr(struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - struct kernfs_node *kn = dentry->d_fsdata; + struct kernfs_node *kn = inode->i_private; struct kernfs_iattrs *attrs; void *secdata; int error; @@ -175,11 +176,11 @@ int kernfs_iop_setxattr(struct dentry *dentry, const char *name, if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; - error = security_inode_setsecurity(d_inode(dentry), suffix, + error = security_inode_setsecurity(inode, suffix, value, size, flags); if (error) return error; - error = security_inode_getsecctx(d_inode(dentry), + error = security_inode_getsecctx(inode, &secdata, &secdata_len); if (error) return error; diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 45c9192c276e..37159235ac10 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -81,7 +81,8 @@ int kernfs_iop_permission(struct inode *inode, int mask); int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr); int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); -int kernfs_iop_setxattr(struct dentry *dentry, const char *name, const void *value, +int kernfs_iop_setxattr(struct dentry *dentry, struct inode *inode, + const char *name, const void *value, size_t size, int flags); int kernfs_iop_removexattr(struct dentry *dentry, const char *name); ssize_t kernfs_iop_getxattr(struct dentry *dentry, struct inode *inode, diff --git a/fs/libfs.c b/fs/libfs.c index 8765ff1adc07..74dc8b9e7f53 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -71,9 +71,7 @@ EXPORT_SYMBOL(simple_lookup); int dcache_dir_open(struct inode *inode, struct file *file) { - static struct qstr cursor_name = QSTR_INIT(".", 1); - - file->private_data = d_alloc(file->f_path.dentry, &cursor_name); + file->private_data = d_alloc_cursor(file->f_path.dentry); return file->private_data ? 0 : -ENOMEM; } @@ -86,6 +84,61 @@ int dcache_dir_close(struct inode *inode, struct file *file) } EXPORT_SYMBOL(dcache_dir_close); +/* parent is locked at least shared */ +static struct dentry *next_positive(struct dentry *parent, + struct list_head *from, + int count) +{ + unsigned *seq = &parent->d_inode->i_dir_seq, n; + struct dentry *res; + struct list_head *p; + bool skipped; + int i; + +retry: + i = count; + skipped = false; + n = smp_load_acquire(seq) & ~1; + res = NULL; + rcu_read_lock(); + for (p = from->next; p != &parent->d_subdirs; p = p->next) { + struct dentry *d = list_entry(p, struct dentry, d_child); + if (!simple_positive(d)) { + skipped = true; + } else if (!--i) { + res = d; + break; + } + } + rcu_read_unlock(); + if (skipped) { + smp_rmb(); + if (unlikely(*seq != n)) + goto retry; + } + return res; +} + +static void move_cursor(struct dentry *cursor, struct list_head *after) +{ + struct dentry *parent = cursor->d_parent; + unsigned n, *seq = &parent->d_inode->i_dir_seq; + spin_lock(&parent->d_lock); + for (;;) { + n = *seq; + if (!(n & 1) && cmpxchg(seq, n, n + 1) == n) + break; + cpu_relax(); + } + __list_del(cursor->d_child.prev, cursor->d_child.next); + if (after) + list_add(&cursor->d_child, after); + else + list_add_tail(&cursor->d_child, &parent->d_subdirs); + smp_store_release(seq, n + 2); + spin_unlock(&parent->d_lock); +} + loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) { struct dentry *dentry = file->f_path.dentry; @@ -101,25 +154,14 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) if (offset != file->f_pos) { file->f_pos = offset; if (file->f_pos >= 2) { - struct list_head *p; struct dentry *cursor = file->private_data; + struct dentry *to; loff_t n = file->f_pos - 2; - spin_lock(&dentry->d_lock); - /* d_lock not required for cursor */ - list_del(&cursor->d_child); - p = dentry->d_subdirs.next; - while (n && p != &dentry->d_subdirs) { - struct dentry *next; - next = list_entry(p, struct dentry, d_child); - spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); - if (simple_positive(next)) - n--; - spin_unlock(&next->d_lock); - p = p->next; - } - list_add_tail(&cursor->d_child, p); - spin_unlock(&dentry->d_lock); + inode_lock_shared(dentry->d_inode); + to = next_positive(dentry, &dentry->d_subdirs, n); + move_cursor(cursor, to ? &to->d_child : NULL); + inode_unlock_shared(dentry->d_inode); } } return offset; @@ -142,36 +184,25 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; struct dentry *cursor = file->private_data; - struct list_head *p, *q = &cursor->d_child; + struct list_head *p = &cursor->d_child; + struct dentry *next; + bool moved = false; if (!dir_emit_dots(file, ctx)) return 0; - spin_lock(&dentry->d_lock); - if (ctx->pos == 2) - list_move(q, &dentry->d_subdirs); - for (p = q->next; p != &dentry->d_subdirs; p = p->next) { - struct dentry *next = list_entry(p, struct dentry, d_child); - spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); - if (!simple_positive(next)) { - spin_unlock(&next->d_lock); - continue; - } - - spin_unlock(&next->d_lock); - spin_unlock(&dentry->d_lock); + if (ctx->pos == 2) + p = &dentry->d_subdirs; + while ((next = next_positive(dentry, p, 1)) != NULL) { if (!dir_emit(ctx, next->d_name.name, next->d_name.len, d_inode(next)->i_ino, dt_type(d_inode(next)))) - return 0; - spin_lock(&dentry->d_lock); - spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); - /* next is still alive */ - list_move(q, p); - spin_unlock(&next->d_lock); - p = q; + break; + moved = true; + p = &next->d_child; ctx->pos++; } - spin_unlock(&dentry->d_lock); + if (moved) + move_cursor(cursor, p); return 0; } EXPORT_SYMBOL(dcache_readdir); @@ -1118,8 +1149,9 @@ static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr) return -EPERM; } -static int empty_dir_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) +static int empty_dir_setxattr(struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { return -EOPNOTSUPP; } diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 154a107cd376..fc4084ef4736 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -335,12 +335,17 @@ static struct notifier_block lockd_inet6addr_notifier = { }; #endif -static void lockd_svc_exit_thread(void) +static void lockd_unregister_notifiers(void) { unregister_inetaddr_notifier(&lockd_inetaddr_notifier); #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&lockd_inet6addr_notifier); #endif +} + +static void lockd_svc_exit_thread(void) +{ + lockd_unregister_notifiers(); svc_exit_thread(nlmsvc_rqst); } @@ -462,7 +467,7 @@ int lockd_up(struct net *net) * Note: svc_serv structures have an initial use count of 1, * so we exit through here on both success and failure. */ -err_net: +err_put: svc_destroy(serv); err_create: mutex_unlock(&nlmsvc_mutex); @@ -470,7 +475,9 @@ err_create: err_start: lockd_down_net(serv, net); - goto err_net; +err_net: + lockd_unregister_notifiers(); + goto err_put; } EXPORT_SYMBOL_GPL(lockd_up); diff --git a/fs/locks.c b/fs/locks.c index 7c5f91be9b65..ee1b15f6fc13 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1628,7 +1628,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr { struct file_lock *fl, *my_fl = NULL, *lease; struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); struct file_lock_context *ctx; bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; diff --git a/fs/namei.c b/fs/namei.c index 15b124c18ed8..70580ab1445c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -35,6 +35,7 @@ #include <linux/fs_struct.h> #include <linux/posix_acl.h> #include <linux/hash.h> +#include <linux/bitops.h> #include <asm/uaccess.h> #include "internal.h" @@ -1415,21 +1416,28 @@ static void follow_mount(struct path *path) } } +static int path_parent_directory(struct path *path) +{ + struct dentry *old = path->dentry; + /* rare case of legitimate dget_parent()... */ + path->dentry = dget_parent(path->dentry); + dput(old); + if (unlikely(!path_connected(path))) + return -ENOENT; + return 0; +} + static int follow_dotdot(struct nameidata *nd) { while(1) { - struct dentry *old = nd->path.dentry; - if (nd->path.dentry == nd->root.dentry && nd->path.mnt == nd->root.mnt) { break; } if (nd->path.dentry != nd->path.mnt->mnt_root) { - /* rare case of legitimate dget_parent()... */ - nd->path.dentry = dget_parent(nd->path.dentry); - dput(old); - if (unlikely(!path_connected(&nd->path))) - return -ENOENT; + int ret = path_parent_directory(&nd->path); + if (ret) + return ret; break; } if (!follow_up(&nd->path)) @@ -1797,74 +1805,144 @@ static int walk_component(struct nameidata *nd, int flags) #include <asm/word-at-a-time.h> -#ifdef CONFIG_64BIT +#ifdef HASH_MIX -static inline unsigned int fold_hash(unsigned long hash) -{ - return hash_64(hash, 32); -} +/* Architecture provides HASH_MIX and fold_hash() in <asm/hash.h> */ + +#elif defined(CONFIG_64BIT) +/* + * Register pressure in the mixing function is an issue, particularly + * on 32-bit x86, but almost any function requires one state value and + * one temporary. Instead, use a function designed for two state values + * and no temporaries. + * + * This function cannot create a collision in only two iterations, so + * we have two iterations to achieve avalanche. In those two iterations, + * we have six layers of mixing, which is enough to spread one bit's + * influence out to 2^6 = 64 state bits. + * + * Rotate constants are scored by considering either 64 one-bit input + * deltas or 64*63/2 = 2016 two-bit input deltas, and finding the + * probability of that delta causing a change to each of the 128 output + * bits, using a sample of random initial states. + * + * The Shannon entropy of the computed probabilities is then summed + * to produce a score. Ideally, any input change has a 50% chance of + * toggling any given output bit. + * + * Mixing scores (in bits) for (12,45): + * Input delta: 1-bit 2-bit + * 1 round: 713.3 42542.6 + * 2 rounds: 2753.7 140389.8 + * 3 rounds: 5954.1 233458.2 + * 4 rounds: 7862.6 256672.2 + * Perfect: 8192 258048 + * (64*128) (64*63/2 * 128) + */ +#define HASH_MIX(x, y, a) \ + ( x ^= (a), \ + y ^= x, x = rol64(x,12),\ + x += y, y = rol64(y,45),\ + y *= 9 ) /* - * This is George Marsaglia's XORSHIFT generator. - * It implements a maximum-period LFSR in only a few - * instructions. It also has the property (required - * by hash_name()) that mix_hash(0) = 0. + * Fold two longs into one 32-bit hash value. This must be fast, but + * latency isn't quite as critical, as there is a fair bit of additional + * work done before the hash value is used. */ -static inline unsigned long mix_hash(unsigned long hash) +static inline unsigned int fold_hash(unsigned long x, unsigned long y) { - hash ^= hash << 13; - hash ^= hash >> 7; - hash ^= hash << 17; - return hash; + y ^= x * GOLDEN_RATIO_64; + y *= GOLDEN_RATIO_64; + return y >> 32; } #else /* 32-bit case */ -#define fold_hash(x) (x) +/* + * Mixing scores (in bits) for (7,20): + * Input delta: 1-bit 2-bit + * 1 round: 330.3 9201.6 + * 2 rounds: 1246.4 25475.4 + * 3 rounds: 1907.1 31295.1 + * 4 rounds: 2042.3 31718.6 + * Perfect: 2048 31744 + * (32*64) (32*31/2 * 64) + */ +#define HASH_MIX(x, y, a) \ + ( x ^= (a), \ + y ^= x, x = rol32(x, 7),\ + x += y, y = rol32(y,20),\ + y *= 9 ) -static inline unsigned long mix_hash(unsigned long hash) +static inline unsigned int fold_hash(unsigned long x, unsigned long y) { - hash ^= hash << 13; - hash ^= hash >> 17; - hash ^= hash << 5; - return hash; + /* Use arch-optimized multiply if one exists */ + return __hash_32(y ^ __hash_32(x)); } #endif -unsigned int full_name_hash(const unsigned char *name, unsigned int len) +/* + * Return the hash of a string of known length. This is carfully + * designed to match hash_name(), which is the more critical function. + * In particular, we must end by hashing a final word containing 0..7 + * payload bytes, to match the way that hash_name() iterates until it + * finds the delimiter after the name. + */ +unsigned int full_name_hash(const char *name, unsigned int len) { - unsigned long a, hash = 0; + unsigned long a, x = 0, y = 0; for (;;) { + if (!len) + goto done; a = load_unaligned_zeropad(name); if (len < sizeof(unsigned long)) break; - hash = mix_hash(hash + a); + HASH_MIX(x, y, a); name += sizeof(unsigned long); len -= sizeof(unsigned long); - if (!len) - goto done; } - hash += a & bytemask_from_count(len); + x ^= a & bytemask_from_count(len); done: - return fold_hash(hash); + return fold_hash(x, y); } EXPORT_SYMBOL(full_name_hash); +/* Return the "hash_len" (hash and length) of a null-terminated string */ +u64 hashlen_string(const char *name) +{ + unsigned long a = 0, x = 0, y = 0, adata, mask, len; + const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; + + len = -sizeof(unsigned long); + do { + HASH_MIX(x, y, a); + len += sizeof(unsigned long); + a = load_unaligned_zeropad(name+len); + } while (!has_zero(a, &adata, &constants)); + + adata = prep_zero_mask(a, adata, &constants); + mask = create_zero_mask(adata); + x ^= a & zero_bytemask(mask); + + return hashlen_create(fold_hash(x, y), len + find_zero(mask)); +} +EXPORT_SYMBOL(hashlen_string); + /* * Calculate the length and hash of the path component, and * return the "hash_len" as the result. */ static inline u64 hash_name(const char *name) { - unsigned long a, b, adata, bdata, mask, hash, len; + unsigned long a = 0, b, x = 0, y = 0, adata, bdata, mask, len; const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; - hash = a = 0; len = -sizeof(unsigned long); do { - hash = mix_hash(hash + a); + HASH_MIX(x, y, a); len += sizeof(unsigned long); a = load_unaligned_zeropad(name+len); b = a ^ REPEAT_BYTE('/'); @@ -1872,25 +1950,40 @@ static inline u64 hash_name(const char *name) adata = prep_zero_mask(a, adata, &constants); bdata = prep_zero_mask(b, bdata, &constants); - mask = create_zero_mask(adata | bdata); + x ^= a & zero_bytemask(mask); - hash += a & zero_bytemask(mask); - len += find_zero(mask); - return hashlen_create(fold_hash(hash), len); + return hashlen_create(fold_hash(x, y), len + find_zero(mask)); } -#else +#else /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */ -unsigned int full_name_hash(const unsigned char *name, unsigned int len) +/* Return the hash of a string of known length */ +unsigned int full_name_hash(const char *name, unsigned int len) { unsigned long hash = init_name_hash(); while (len--) - hash = partial_name_hash(*name++, hash); + hash = partial_name_hash((unsigned char)*name++, hash); return end_name_hash(hash); } EXPORT_SYMBOL(full_name_hash); +/* Return the "hash_len" (hash and length) of a null-terminated string */ +u64 hashlen_string(const char *name) +{ + unsigned long hash = init_name_hash(); + unsigned long len = 0, c; + + c = (unsigned char)*name; + while (c) { + len++; + hash = partial_name_hash(c, hash); + c = (unsigned char)name[len]; + } + return hashlen_create(end_name_hash(hash), len); +} +EXPORT_SYMBOL(hashlen_string); + /* * We know there's a real path component here of at least * one character. @@ -1934,7 +2027,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) int type; err = may_lookup(nd); - if (err) + if (err) return err; hash_len = hash_name(name); @@ -2428,6 +2521,34 @@ struct dentry *lookup_one_len_unlocked(const char *name, } EXPORT_SYMBOL(lookup_one_len_unlocked); +#ifdef CONFIG_UNIX98_PTYS +int path_pts(struct path *path) +{ + /* Find something mounted on "pts" in the same directory as + * the input path. + */ + struct dentry *child, *parent; + struct qstr this; + int ret; + + ret = path_parent_directory(path); + if (ret) + return ret; + + parent = path->dentry; + this.name = "pts"; + this.len = 3; + child = d_hash_and_lookup(parent, &this); + if (!child) + return -ENOENT; + + path->dentry = child; + dput(parent); + follow_mount(path); + return 0; +} +#endif + int user_path_at_empty(int dfd, const char __user *name, unsigned flags, struct path *path, int *empty) { @@ -2909,9 +3030,13 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, } if (*opened & FILE_CREATED) fsnotify_create(dir, dentry); - path->dentry = dentry; - path->mnt = nd->path.mnt; - return 1; + if (unlikely(d_is_negative(dentry))) { + error = -ENOENT; + } else { + path->dentry = dentry; + path->mnt = nd->path.mnt; + return 1; + } } } dput(dentry); @@ -3080,9 +3205,7 @@ static int do_last(struct nameidata *nd, int acc_mode = op->acc_mode; unsigned seq; struct inode *inode; - struct path save_parent = { .dentry = NULL, .mnt = NULL }; struct path path; - bool retried = false; int error; nd->flags &= ~LOOKUP_PARENT; @@ -3125,7 +3248,6 @@ static int do_last(struct nameidata *nd, return -EISDIR; } -retry_lookup: if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { error = mnt_want_write(nd->path.mnt); if (!error) @@ -3177,6 +3299,10 @@ retry_lookup: got_write = false; } + error = follow_managed(&path, nd); + if (unlikely(error < 0)) + return error; + if (unlikely(d_is_negative(path.dentry))) { path_to_nameidata(&path, nd); return -ENOENT; @@ -3192,10 +3318,6 @@ retry_lookup: return -EEXIST; } - error = follow_managed(&path, nd); - if (unlikely(error < 0)) - return error; - seq = 0; /* out of RCU mode, so the value doesn't matter */ inode = d_backing_inode(path.dentry); finish_lookup: @@ -3206,23 +3328,14 @@ finish_lookup: if (unlikely(error)) return error; - if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) { - path_to_nameidata(&path, nd); - } else { - save_parent.dentry = nd->path.dentry; - save_parent.mnt = mntget(path.mnt); - nd->path.dentry = path.dentry; - - } + path_to_nameidata(&path, nd); nd->inode = inode; nd->seq = seq; /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ finish_open: error = complete_walk(nd); - if (error) { - path_put(&save_parent); + if (error) return error; - } audit_inode(nd->name, nd->path.dentry, 0); error = -EISDIR; if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) @@ -3245,13 +3358,9 @@ finish_open_created: goto out; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ error = vfs_open(&nd->path, file, current_cred()); - if (!error) { - *opened |= FILE_OPENED; - } else { - if (error == -EOPENSTALE) - goto stale_open; + if (error) goto out; - } + *opened |= FILE_OPENED; opened: error = open_check_o_direct(file); if (!error) @@ -3267,26 +3376,7 @@ out: } if (got_write) mnt_drop_write(nd->path.mnt); - path_put(&save_parent); return error; - -stale_open: - /* If no saved parent or already retried then can't retry */ - if (!save_parent.dentry || retried) - goto out; - - BUG_ON(save_parent.dentry != dir); - path_put(&nd->path); - nd->path = save_parent; - nd->inode = dir->d_inode; - save_parent.mnt = NULL; - save_parent.dentry = NULL; - if (got_write) { - mnt_drop_write(nd->path.mnt); - got_write = false; - } - retried = true; - goto retry_lookup; } static int do_tmpfile(struct nameidata *nd, unsigned flags, diff --git a/fs/namespace.c b/fs/namespace.c index 4fb1691b4355..419f746d851d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1562,6 +1562,7 @@ void __detach_mounts(struct dentry *dentry) goto out_unlock; lock_mount_hash(); + event++; while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); if (mnt->mnt.mnt_flags & MNT_UMOUNT) { @@ -2409,8 +2410,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } if (type->fs_flags & FS_USERNS_VISIBLE) { - if (!fs_fully_visible(type, &mnt_flags)) + if (!fs_fully_visible(type, &mnt_flags)) { + put_filesystem(type); return -EPERM; + } } } @@ -3245,6 +3248,10 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC) mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC); + /* Don't miss readonly hidden in the superblock flags */ + if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY) + mnt_flags |= MNT_LOCK_READONLY; + /* Verify the mount flags are equal to or more permissive * than the proposed new mount. */ @@ -3271,7 +3278,7 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { struct inode *inode = child->mnt_mountpoint->d_inode; /* Only worry about locked mounts */ - if (!(mnt_flags & MNT_LOCKED)) + if (!(child->mnt.mnt_flags & MNT_LOCKED)) continue; /* Is the directory permanetly empty? */ if (!is_empty_dir_inode(inode)) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index aaf7bd0cbae2..19d93d0cd400 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -424,12 +424,17 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc, static int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) { + struct inode *inode; struct nfs_inode *nfsi; if (d_really_is_negative(dentry)) return 0; - nfsi = NFS_I(d_inode(dentry)); + inode = d_inode(dentry); + if (is_bad_inode(inode) || NFS_STALE(inode)) + return 0; + + nfsi = NFS_I(inode); if (entry->fattr->fileid == nfsi->fileid) return 1; if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0) @@ -1363,7 +1368,6 @@ EXPORT_SYMBOL_GPL(nfs_dentry_operations); struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *res; - struct dentry *parent; struct inode *inode = NULL; struct nfs_fh *fhandle = NULL; struct nfs_fattr *fattr = NULL; @@ -1393,7 +1397,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in if (IS_ERR(label)) goto out; - parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ trace_nfs_lookup_enter(dir, dentry, flags); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); @@ -1482,11 +1485,13 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned open_flags, umode_t mode, int *opened) { + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); struct nfs_open_context *ctx; struct dentry *res; struct iattr attr = { .ia_valid = ATTR_OPEN }; struct inode *inode; unsigned int lookup_flags = 0; + bool switched = false; int err; /* Expect a negative dentry */ @@ -1501,7 +1506,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, /* NFS only supports OPEN on regular files */ if ((open_flags & O_DIRECTORY)) { - if (!d_unhashed(dentry)) { + if (!d_in_lookup(dentry)) { /* * Hashed negative dentry with O_DIRECTORY: dentry was * revalidated and is fine, no need to perform lookup @@ -1525,6 +1530,17 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, attr.ia_size = 0; } + if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) { + d_drop(dentry); + switched = true; + dentry = d_alloc_parallel(dentry->d_parent, + &dentry->d_name, &wq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + if (unlikely(!d_in_lookup(dentry))) + return finish_no_open(file, dentry); + } + ctx = create_nfs_open_context(dentry, open_flags); err = PTR_ERR(ctx); if (IS_ERR(ctx)) @@ -1536,9 +1552,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, err = PTR_ERR(inode); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); put_nfs_open_context(ctx); + d_drop(dentry); switch (err) { case -ENOENT: - d_drop(dentry); d_add(dentry, NULL); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); break; @@ -1560,14 +1576,23 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); put_nfs_open_context(ctx); out: + if (unlikely(switched)) { + d_lookup_done(dentry); + dput(dentry); + } return err; no_open: res = nfs_lookup(dir, dentry, lookup_flags); - err = PTR_ERR(res); + if (switched) { + d_lookup_done(dentry); + if (!res) + res = dentry; + else + dput(dentry); + } if (IS_ERR(res)) - goto out; - + return PTR_ERR(res); return finish_no_open(file, res); } EXPORT_SYMBOL_GPL(nfs_atomic_open); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 979b3c4dee6a..c7326c2af2c3 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -353,10 +353,12 @@ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) result = wait_for_completion_killable(&dreq->completion); + if (!result) { + result = dreq->count; + WARN_ON_ONCE(dreq->count < 0); + } if (!result) result = dreq->error; - if (!result) - result = dreq->count; out: return (ssize_t) result; @@ -386,8 +388,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write) if (dreq->iocb) { long res = (long) dreq->error; - if (!res) + if (dreq->count != 0) { res = (long) dreq->count; + WARN_ON_ONCE(dreq->count < 0); + } dreq->iocb->ki_complete(dreq->iocb, res, 0); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 52e7d6869e3b..dda689d7a8a7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -282,6 +282,7 @@ nfs_init_locked(struct inode *inode, void *opaque) struct nfs_fattr *fattr = desc->fattr; set_nfs_fileid(inode, fattr->fileid); + inode->i_mode = fattr->mode; nfs_copy_fh(NFS_FH(inode), desc->fh); return 0; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 223982eb38c9..ff416d0e24bc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2882,12 +2882,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) call_close |= is_wronly; else if (is_wronly) calldata->arg.fmode |= FMODE_WRITE; + if (calldata->arg.fmode != (FMODE_READ|FMODE_WRITE)) + call_close |= is_rdwr; } else if (is_rdwr) calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; - if (calldata->arg.fmode == 0) - call_close |= is_rdwr; - if (!nfs4_valid_open_stateid(state)) call_close = 0; spin_unlock(&state->owner->so_lock); @@ -5015,12 +5014,11 @@ static int nfs4_do_set_security_label(struct inode *inode, } static int -nfs4_set_security_label(struct dentry *dentry, const void *buf, size_t buflen) +nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen) { struct nfs4_label ilabel, *olabel = NULL; struct nfs_fattr fattr; struct rpc_cred *cred; - struct inode *inode = d_inode(dentry); int status; if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) @@ -6281,11 +6279,11 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler, - struct dentry *dentry, const char *key, - const void *buf, size_t buflen, - int flags) + struct dentry *unused, struct inode *inode, + const char *key, const void *buf, + size_t buflen, int flags) { - return nfs4_proc_set_acl(d_inode(dentry), buf, buflen); + return nfs4_proc_set_acl(inode, buf, buflen); } static int nfs4_xattr_get_nfs4_acl(const struct xattr_handler *handler, @@ -6303,12 +6301,12 @@ static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry) #ifdef CONFIG_NFS_V4_SECURITY_LABEL static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler, - struct dentry *dentry, const char *key, - const void *buf, size_t buflen, - int flags) + struct dentry *unused, struct inode *inode, + const char *key, const void *buf, + size_t buflen, int flags) { if (security_ismaclabel(key)) - return nfs4_set_security_label(dentry, buf, buflen); + return nfs4_set_security_label(inode, buf, buflen); return -EOPNOTSUPP; } @@ -7925,8 +7923,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, break; } lo = NFS_I(inode)->layout; - if (lo && nfs4_stateid_match(&lgp->args.stateid, - &lo->plh_stateid)) { + if (lo && !test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) && + nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) { LIST_HEAD(head); /* @@ -7937,10 +7935,10 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0); spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&head); + status = -EAGAIN; + goto out; } else spin_unlock(&inode->i_lock); - status = -EAGAIN; - goto out; } status = nfs4_handle_exception(server, status, exception); @@ -8037,7 +8035,10 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) .flags = RPC_TASK_ASYNC, }; struct pnfs_layout_segment *lseg = NULL; - struct nfs4_exception exception = { .timeout = *timeout }; + struct nfs4_exception exception = { + .inode = inode, + .timeout = *timeout, + }; int status = 0; dprintk("--> %s\n", __func__); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5075592df145..834b875900d6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -66,7 +66,7 @@ #define OPENOWNER_POOL_SIZE 8 const nfs4_stateid zero_stateid = { - .data = { 0 }, + { .data = { 0 } }, .type = NFS4_SPECIAL_STATEID_TYPE, }; static DEFINE_MUTEX(nfs_clid_init_mutex); @@ -1488,9 +1488,9 @@ restart: } spin_unlock(&state->state_lock); } - nfs4_put_open_state(state); clear_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); + nfs4_put_open_state(state); spin_lock(&sp->so_lock); goto restart; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0c7e0d45a4de..0fbe734cc38c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -361,8 +361,10 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, list_del_init(&lseg->pls_list); /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ atomic_dec(&lo->plh_refcount); - if (list_empty(&lo->plh_segs)) + if (list_empty(&lo->plh_segs)) { + set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); + } rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); } @@ -1290,6 +1292,7 @@ alloc_init_layout_hdr(struct inode *ino, INIT_LIST_HEAD(&lo->plh_bulk_destroy); lo->plh_inode = ino; lo->plh_lc_cred = get_rpccred(ctx->cred); + lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID; return lo; } @@ -1297,6 +1300,8 @@ static struct pnfs_layout_hdr * pnfs_find_alloc_layout(struct inode *ino, struct nfs_open_context *ctx, gfp_t gfp_flags) + __releases(&ino->i_lock) + __acquires(&ino->i_lock) { struct nfs_inode *nfsi = NFS_I(ino); struct pnfs_layout_hdr *new = NULL; @@ -1565,8 +1570,7 @@ lookup_again: * stateid, or it has been invalidated, then we must use the open * stateid. */ - if (lo->plh_stateid.seqid == 0 || - test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { + if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { /* * The first layoutget for the file. Need to serialize per diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 0dfc476da3e1..b38e3c0dc790 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -247,7 +247,11 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages, } /* Helper function for pnfs_generic_commit_pagelist to catch an empty - * page list. This can happen when two commits race. */ + * page list. This can happen when two commits race. + * + * This must be called instead of nfs_init_commit - call one or the other, but + * not both! + */ static bool pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages, struct nfs_commit_data *data, @@ -256,7 +260,11 @@ pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages, if (list_empty(pages)) { if (atomic_dec_and_test(&cinfo->mds->rpcs_out)) wake_up_atomic_t(&cinfo->mds->rpcs_out); - nfs_commitdata_release(data); + /* don't call nfs_commitdata_release - it tries to put + * the open_context which is not acquired until nfs_init_commit + * which has not been called on @data */ + WARN_ON_ONCE(data->context); + nfs_commit_free(data); return true; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6776d7a7839e..572e5b3b06f1 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -367,13 +367,13 @@ readpage_async_filler(void *data, struct page *page) nfs_list_remove_request(new); nfs_readpage_release(new); error = desc->pgio->pg_error; - goto out_unlock; + goto out; } return 0; out_error: error = PTR_ERR(new); -out_unlock: unlock_page(page); +out: return error; } diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index e55b5242614d..31f3df193bdb 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -290,7 +290,7 @@ out_free_buf: return error; } -#define NFSD_MDS_PR_KEY 0x0100000000000000 +#define NFSD_MDS_PR_KEY 0x0100000000000000ULL /* * We use the client ID as a unique key for the reservations. diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 1580ea6fd64d..d08cd88155c7 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -104,22 +104,21 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp, goto out; inode = d_inode(fh->fh_dentry); - if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { - error = -EOPNOTSUPP; - goto out_errno; - } error = fh_want_write(fh); if (error) goto out_errno; - error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); + fh_lock(fh); + + error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access); if (error) - goto out_drop_write; - error = inode->i_op->set_acl(inode, argp->acl_default, - ACL_TYPE_DEFAULT); + goto out_drop_lock; + error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default); if (error) - goto out_drop_write; + goto out_drop_lock; + + fh_unlock(fh); fh_drop_write(fh); @@ -131,7 +130,8 @@ out: posix_acl_release(argp->acl_access); posix_acl_release(argp->acl_default); return nfserr; -out_drop_write: +out_drop_lock: + fh_unlock(fh); fh_drop_write(fh); out_errno: nfserr = nfserrno(error); diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 01df4cd7c753..0c890347cde3 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -95,22 +95,20 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp, goto out; inode = d_inode(fh->fh_dentry); - if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { - error = -EOPNOTSUPP; - goto out_errno; - } error = fh_want_write(fh); if (error) goto out_errno; - error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); + fh_lock(fh); + + error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access); if (error) - goto out_drop_write; - error = inode->i_op->set_acl(inode, argp->acl_default, - ACL_TYPE_DEFAULT); + goto out_drop_lock; + error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default); -out_drop_write: +out_drop_lock: + fh_unlock(fh); fh_drop_write(fh); out_errno: nfserr = nfserrno(error); diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 6adabd6049b7..71292a0d6f09 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -770,9 +770,6 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, dentry = fhp->fh_dentry; inode = d_inode(dentry); - if (!inode->i_op->set_acl || !IS_POSIXACL(inode)) - return nfserr_attrnotsupp; - if (S_ISDIR(inode->i_mode)) flags = NFS4_ACL_DIR; @@ -782,16 +779,19 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_error < 0) goto out_nfserr; - host_error = inode->i_op->set_acl(inode, pacl, ACL_TYPE_ACCESS); + fh_lock(fhp); + + host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl); if (host_error < 0) - goto out_release; + goto out_drop_lock; if (S_ISDIR(inode->i_mode)) { - host_error = inode->i_op->set_acl(inode, dpacl, - ACL_TYPE_DEFAULT); + host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl); } -out_release: +out_drop_lock: + fh_unlock(fhp); + posix_acl_release(pacl); posix_acl_release(dpacl); out_nfserr: diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7389cb1d7409..04c68d900324 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -710,22 +710,6 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc } } -static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args) -{ - struct rpc_xprt *xprt; - - if (args->protocol != XPRT_TRANSPORT_BC_TCP) - return rpc_create(args); - - xprt = args->bc_xprt->xpt_bc_xprt; - if (xprt) { - xprt_get(xprt); - return rpc_create_xprt(args, xprt); - } - - return rpc_create(args); -} - static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { int maxtime = max_cb_time(clp->net); @@ -768,7 +752,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c args.authflavor = ses->se_cb_sec.flavor; } /* Create RPC client */ - client = create_backchannel_client(&args); + client = rpc_create(&args); if (IS_ERR(client)) { dprintk("NFSD: couldn't create callback client: %ld\n", PTR_ERR(client)); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f5f82e145018..70d0b9b33031 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3480,12 +3480,17 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, } static struct nfs4_ol_stateid * -init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, - struct nfsd4_open *open) +init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_openowner *oo = open->op_openowner; struct nfs4_ol_stateid *retstp = NULL; + struct nfs4_ol_stateid *stp; + + stp = open->op_stp; + /* We are moving these outside of the spinlocks to avoid the warnings */ + mutex_init(&stp->st_mutex); + mutex_lock(&stp->st_mutex); spin_lock(&oo->oo_owner.so_client->cl_lock); spin_lock(&fp->fi_lock); @@ -3493,6 +3498,8 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, retstp = nfsd4_find_existing_open(fp, open); if (retstp) goto out_unlock; + + open->op_stp = NULL; atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); @@ -3502,14 +3509,19 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; - init_rwsem(&stp->st_rwsem); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&oo->oo_owner.so_client->cl_lock); - return retstp; + if (retstp) { + mutex_lock(&retstp->st_mutex); + /* To keep mutex tracking happy */ + mutex_unlock(&stp->st_mutex); + stp = retstp; + } + return stp; } /* @@ -4305,7 +4317,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; struct nfs4_ol_stateid *stp = NULL; - struct nfs4_ol_stateid *swapstp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; @@ -4335,32 +4346,28 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ - down_read(&stp->st_rwsem); + mutex_lock(&stp->st_mutex); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); goto out; } } else { - stp = open->op_stp; - open->op_stp = NULL; - swapstp = init_open_stateid(stp, fp, open); - if (swapstp) { - nfs4_put_stid(&stp->st_stid); - stp = swapstp; - down_read(&stp->st_rwsem); + /* stp is returned locked. */ + stp = init_open_stateid(fp, open); + /* See if we lost the race to some other thread */ + if (stp->st_access_bmap != 0) { status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); goto out; } goto upgrade_out; } - down_read(&stp->st_rwsem); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); release_open_stateid(stp); goto out; } @@ -4372,7 +4379,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } upgrade_out: nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); if (nfsd4_has_session(&resp->cstate)) { if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { @@ -4977,12 +4984,12 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; - down_write(&stp->st_rwsem); + mutex_lock(&stp->st_mutex); status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status == nfs_ok) status = nfs4_check_fh(current_fh, &stp->st_stid); if (status != nfs_ok) - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); return status; } @@ -5030,7 +5037,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; } @@ -5062,12 +5069,12 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; if (oo->oo_flags & NFS4_OO_CONFIRMED) { - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); goto put_stateid; } oo->oo_flags |= NFS4_OO_CONFIRMED; nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid); - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); @@ -5143,7 +5150,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid); status = nfs_ok; put_stateid: - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); @@ -5196,7 +5203,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfsd4_close_open_stateid(stp); @@ -5422,7 +5429,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; - init_rwsem(&stp->st_rwsem); + mutex_init(&stp->st_mutex); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -5591,7 +5598,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &open_stp, nn); if (status) goto out; - up_write(&open_stp->st_rwsem); + mutex_unlock(&open_stp->st_mutex); open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, @@ -5600,7 +5607,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new); if (status == nfs_ok) - down_write(&lock_stp->st_rwsem); + mutex_lock(&lock_stp->st_mutex); } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, @@ -5704,7 +5711,7 @@ out: seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; - up_write(&lock_stp->st_rwsem); + mutex_unlock(&lock_stp->st_mutex); /* * If this is a new, never-before-used stateid, and we are @@ -5874,7 +5881,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fput: fput(filp); put_stateid: - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 986e51e5ceac..64053eadeb81 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -535,7 +535,7 @@ struct nfs4_ol_stateid { unsigned char st_access_bmap; unsigned char st_deny_bmap; struct nfs4_ol_stateid *st_openstp; - struct rw_semaphore st_rwsem; + struct mutex st_mutex; }; static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 809bd2de7ad0..e9fd241b9a0a 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -439,7 +439,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp) if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC) return 0; bytes = le16_to_cpu(sbp->s_bytes); - if (bytes > BLOCK_SIZE) + if (bytes < sumoff + 4 || bytes > BLOCK_SIZE) return 0; crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp, sumoff); diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index e27e6527912b..4342c7ee7d20 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile @@ -1,7 +1,5 @@ ccflags-y := -Ifs/ocfs2 -ccflags-y += -DCATCH_BH_JBD_RACES - obj-$(CONFIG_OCFS2_FS) += \ ocfs2.o \ ocfs2_stackglue.o diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index fe50ded1b4ce..498641eed2db 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -139,11 +139,16 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, lock_buffer(bh); if (buffer_jbd(bh)) { +#ifdef CATCH_BH_JBD_RACES mlog(ML_ERROR, "block %llu had the JBD bit set " "while I was in lock_buffer!", (unsigned long long)bh->b_blocknr); BUG(); +#else + unlock_buffer(bh); + continue; +#endif } clear_buffer_uptodate(bh); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index a8d15beee5cb..6aaf3e351391 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -272,10 +272,21 @@ struct o2hb_region { struct delayed_work hr_write_timeout_work; unsigned long hr_last_timeout_start; + /* negotiate timer, used to negotiate extending hb timeout. */ + struct delayed_work hr_nego_timeout_work; + unsigned long hr_nego_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; + /* Used during o2hb_check_slot to hold a copy of the block * being checked because we temporarily have to zero out the * crc field. */ struct o2hb_disk_heartbeat_block *hr_tmp_block; + + /* Message key for negotiate timeout message. */ + unsigned int hr_key; + struct list_head hr_handler_list; + + /* last hb status, 0 for success, other value for error. */ + int hr_last_hb_status; }; struct o2hb_bio_wait_ctxt { @@ -284,6 +295,17 @@ struct o2hb_bio_wait_ctxt { int wc_error; }; +#define O2HB_NEGO_TIMEOUT_MS (O2HB_MAX_WRITE_TIMEOUT_MS/2) + +enum { + O2HB_NEGO_TIMEOUT_MSG = 1, + O2HB_NEGO_APPROVE_MSG = 2, +}; + +struct o2hb_nego_msg { + u8 node_num; +}; + static void o2hb_write_timeout(struct work_struct *work) { int failed, quorum; @@ -319,7 +341,7 @@ static void o2hb_write_timeout(struct work_struct *work) o2quo_disk_timeout(); } -static void o2hb_arm_write_timeout(struct o2hb_region *reg) +static void o2hb_arm_timeout(struct o2hb_region *reg) { /* Arm writeout only after thread reaches steady state */ if (atomic_read(®->hr_steady_iterations) != 0) @@ -334,14 +356,132 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg) spin_unlock(&o2hb_live_lock); } cancel_delayed_work(®->hr_write_timeout_work); - reg->hr_last_timeout_start = jiffies; schedule_delayed_work(®->hr_write_timeout_work, msecs_to_jiffies(O2HB_MAX_WRITE_TIMEOUT_MS)); + + cancel_delayed_work(®->hr_nego_timeout_work); + /* negotiate timeout must be less than write timeout. */ + schedule_delayed_work(®->hr_nego_timeout_work, + msecs_to_jiffies(O2HB_NEGO_TIMEOUT_MS)); + memset(reg->hr_nego_node_bitmap, 0, sizeof(reg->hr_nego_node_bitmap)); } -static void o2hb_disarm_write_timeout(struct o2hb_region *reg) +static void o2hb_disarm_timeout(struct o2hb_region *reg) { cancel_delayed_work_sync(®->hr_write_timeout_work); + cancel_delayed_work_sync(®->hr_nego_timeout_work); +} + +static int o2hb_send_nego_msg(int key, int type, u8 target) +{ + struct o2hb_nego_msg msg; + int status, ret; + + msg.node_num = o2nm_this_node(); +again: + ret = o2net_send_message(type, key, &msg, sizeof(msg), + target, &status); + + if (ret == -EAGAIN || ret == -ENOMEM) { + msleep(100); + goto again; + } + + return ret; +} + +static void o2hb_nego_timeout(struct work_struct *work) +{ + unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; + int master_node, i, ret; + struct o2hb_region *reg; + + reg = container_of(work, struct o2hb_region, hr_nego_timeout_work.work); + /* don't negotiate timeout if last hb failed since it is very + * possible io failed. Should let write timeout fence self. + */ + if (reg->hr_last_hb_status) + return; + + o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap)); + /* lowest node as master node to make negotiate decision. */ + master_node = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, 0); + + if (master_node == o2nm_this_node()) { + if (!test_bit(master_node, reg->hr_nego_node_bitmap)) { + printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s).\n", + o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000, + config_item_name(®->hr_item), reg->hr_dev_name); + set_bit(master_node, reg->hr_nego_node_bitmap); + } + if (memcmp(reg->hr_nego_node_bitmap, live_node_bitmap, + sizeof(reg->hr_nego_node_bitmap))) { + /* check negotiate bitmap every second to do timeout + * approve decision. + */ + schedule_delayed_work(®->hr_nego_timeout_work, + msecs_to_jiffies(1000)); + + return; + } + + printk(KERN_NOTICE "o2hb: all nodes hb write hung, maybe region %s (%s) is down.\n", + config_item_name(®->hr_item), reg->hr_dev_name); + /* approve negotiate timeout request. */ + o2hb_arm_timeout(reg); + + i = -1; + while ((i = find_next_bit(live_node_bitmap, + O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { + if (i == master_node) + continue; + + mlog(ML_HEARTBEAT, "send NEGO_APPROVE msg to node %d\n", i); + ret = o2hb_send_nego_msg(reg->hr_key, + O2HB_NEGO_APPROVE_MSG, i); + if (ret) + mlog(ML_ERROR, "send NEGO_APPROVE msg to node %d fail %d\n", + i, ret); + } + } else { + /* negotiate timeout with master node. */ + printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s), negotiate timeout with node %d.\n", + o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000, config_item_name(®->hr_item), + reg->hr_dev_name, master_node); + ret = o2hb_send_nego_msg(reg->hr_key, O2HB_NEGO_TIMEOUT_MSG, + master_node); + if (ret) + mlog(ML_ERROR, "send NEGO_TIMEOUT msg to node %d fail %d\n", + master_node, ret); + } +} + +static int o2hb_nego_timeout_handler(struct o2net_msg *msg, u32 len, void *data, + void **ret_data) +{ + struct o2hb_region *reg = data; + struct o2hb_nego_msg *nego_msg; + + nego_msg = (struct o2hb_nego_msg *)msg->buf; + printk(KERN_NOTICE "o2hb: receive negotiate timeout message from node %d on region %s (%s).\n", + nego_msg->node_num, config_item_name(®->hr_item), reg->hr_dev_name); + if (nego_msg->node_num < O2NM_MAX_NODES) + set_bit(nego_msg->node_num, reg->hr_nego_node_bitmap); + else + mlog(ML_ERROR, "got nego timeout message from bad node.\n"); + + return 0; +} + +static int o2hb_nego_approve_handler(struct o2net_msg *msg, u32 len, void *data, + void **ret_data) +{ + struct o2hb_region *reg = data; + + printk(KERN_NOTICE "o2hb: negotiate timeout approved by master node on region %s (%s).\n", + config_item_name(®->hr_item), reg->hr_dev_name); + o2hb_arm_timeout(reg); + return 0; } static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc) @@ -1032,7 +1172,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) /* Skip disarming the timeout if own slot has stale/bad data */ if (own_slot_ok) { o2hb_set_quorum_device(reg); - o2hb_arm_write_timeout(reg); + o2hb_arm_timeout(reg); + reg->hr_last_timeout_start = jiffies; } bail: @@ -1096,6 +1237,7 @@ static int o2hb_thread(void *data) before_hb = ktime_get_real(); ret = o2hb_do_disk_heartbeat(reg); + reg->hr_last_hb_status = ret; after_hb = ktime_get_real(); @@ -1114,7 +1256,7 @@ static int o2hb_thread(void *data) } } - o2hb_disarm_write_timeout(reg); + o2hb_disarm_timeout(reg); /* unclean stop is only used in very bad situation */ for(i = 0; !reg->hr_unclean_stop && i < reg->hr_blocks; i++) @@ -1451,6 +1593,7 @@ static void o2hb_region_release(struct config_item *item) list_del(®->hr_all_item); spin_unlock(&o2hb_live_lock); + o2net_unregister_handler_list(®->hr_handler_list); kfree(reg); } @@ -1762,6 +1905,7 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, } INIT_DELAYED_WORK(®->hr_write_timeout_work, o2hb_write_timeout); + INIT_DELAYED_WORK(®->hr_nego_timeout_work, o2hb_nego_timeout); /* * A node is considered live after it has beat LIVE_THRESHOLD @@ -1995,13 +2139,37 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g config_item_init_type_name(®->hr_item, name, &o2hb_region_type); + /* this is the same way to generate msg key as dlm, for local heartbeat, + * name is also the same, so make initial crc value different to avoid + * message key conflict. + */ + reg->hr_key = crc32_le(reg->hr_region_num + O2NM_MAX_REGIONS, + name, strlen(name)); + INIT_LIST_HEAD(®->hr_handler_list); + ret = o2net_register_handler(O2HB_NEGO_TIMEOUT_MSG, reg->hr_key, + sizeof(struct o2hb_nego_msg), + o2hb_nego_timeout_handler, + reg, NULL, ®->hr_handler_list); + if (ret) + goto free; + + ret = o2net_register_handler(O2HB_NEGO_APPROVE_MSG, reg->hr_key, + sizeof(struct o2hb_nego_msg), + o2hb_nego_approve_handler, + reg, NULL, ®->hr_handler_list); + if (ret) + goto unregister_handler; + ret = o2hb_debug_region_init(reg, o2hb_debug_dir); if (ret) { config_item_put(®->hr_item); - goto free; + goto unregister_handler; } return ®->hr_item; + +unregister_handler: + o2net_unregister_handler_list(®->hr_handler_list); free: kfree(reg); return ERR_PTR(ret); diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index b95e7df5b76a..94b18369b1cc 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -44,6 +44,9 @@ * version here in tcp_internal.h should not need to be bumped for * filesystem locking changes. * + * New in version 12 + * - Negotiate hb timeout when storage is down. + * * New in version 11 * - Negotiation of filesystem locking in the dlm join. * @@ -75,7 +78,7 @@ * - full 64 bit i_size in the metadata lock lvbs * - introduction of "rw" lock and pushing meta/data locking down */ -#define O2NET_PROTOCOL_VERSION 11ULL +#define O2NET_PROTOCOL_VERSION 12ULL struct o2net_handshake { __be64 protocol_version; __be64 connector_id; diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index ad16995c9e7a..d2053853951e 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -7254,10 +7254,11 @@ static int ocfs2_xattr_security_get(const struct xattr_handler *handler, } static int ocfs2_xattr_security_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY, + return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value, size, flags); } @@ -7325,10 +7326,11 @@ static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, } static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED, + return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value, size, flags); } @@ -7354,15 +7356,16 @@ static int ocfs2_xattr_user_get(const struct xattr_handler *handler, } static int ocfs2_xattr_user_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) return -EOPNOTSUPP; - return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_USER, + return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value, size, flags); } diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index 99c19545752c..5893ddde0e4b 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -448,13 +448,14 @@ out_unlock: } static int orangefs_xattr_set_default(const struct xattr_handler *handler, - struct dentry *dentry, + struct dentry *unused, + struct inode *inode, const char *name, const void *buffer, size_t size, int flags) { - return orangefs_inode_setxattr(dentry->d_inode, + return orangefs_inode_setxattr(inode, ORANGEFS_XATTR_NAME_DEFAULT_PREFIX, name, buffer, @@ -478,13 +479,14 @@ static int orangefs_xattr_get_default(const struct xattr_handler *handler, } static int orangefs_xattr_set_trusted(const struct xattr_handler *handler, - struct dentry *dentry, + struct dentry *unused, + struct inode *inode, const char *name, const void *buffer, size_t size, int flags) { - return orangefs_inode_setxattr(dentry->d_inode, + return orangefs_inode_setxattr(inode, ORANGEFS_XATTR_NAME_TRUSTED_PREFIX, name, buffer, diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index cc514da6f3e7..80aa6f1eb336 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -336,7 +336,6 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, struct dentry *upperdir; struct dentry *upperdentry; const struct cred *old_cred; - struct cred *override_cred; char *link = NULL; if (WARN_ON(!workdir)) @@ -357,28 +356,7 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, return PTR_ERR(link); } - err = -ENOMEM; - override_cred = prepare_creds(); - if (!override_cred) - goto out_free_link; - - override_cred->fsuid = stat->uid; - override_cred->fsgid = stat->gid; - /* - * CAP_SYS_ADMIN for copying up extended attributes - * CAP_DAC_OVERRIDE for create - * CAP_FOWNER for chmod, timestamp update - * CAP_FSETID for chmod - * CAP_CHOWN for chown - * CAP_MKNOD for mknod - */ - cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - cap_raise(override_cred->cap_effective, CAP_FOWNER); - cap_raise(override_cred->cap_effective, CAP_FSETID); - cap_raise(override_cred->cap_effective, CAP_CHOWN); - cap_raise(override_cred->cap_effective, CAP_MKNOD); - old_cred = override_creds(override_cred); + old_cred = ovl_override_creds(dentry->d_sb); err = -EIO; if (lock_rename(workdir, upperdir) != NULL) { @@ -401,9 +379,7 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, out_unlock: unlock_rename(workdir, upperdir); revert_creds(old_cred); - put_cred(override_cred); -out_free_link: if (link) free_page((unsigned long) link); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index b3fc0a35bf62..5c9d2d80ff70 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -407,26 +407,20 @@ static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev, const struct cred *old_cred; struct cred *override_cred; + old_cred = ovl_override_creds(dentry->d_sb); + err = -ENOMEM; override_cred = prepare_creds(); - if (!override_cred) - goto out_iput; - - /* - * CAP_SYS_ADMIN for setting opaque xattr - * CAP_DAC_OVERRIDE for create in workdir, rename - * CAP_FOWNER for removing whiteout from sticky dir - */ - cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - cap_raise(override_cred->cap_effective, CAP_FOWNER); - old_cred = override_creds(override_cred); - - err = ovl_create_over_whiteout(dentry, inode, &stat, link, - hardlink); - + if (override_cred) { + override_cred->fsuid = old_cred->fsuid; + override_cred->fsgid = old_cred->fsgid; + put_cred(override_creds(override_cred)); + put_cred(override_cred); + + err = ovl_create_over_whiteout(dentry, inode, &stat, + link, hardlink); + } revert_creds(old_cred); - put_cred(override_cred); } if (!err) @@ -511,6 +505,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) struct dentry *upper; struct dentry *opaquedir = NULL; int err; + int flags = 0; if (WARN_ON(!workdir)) return -EROFS; @@ -540,46 +535,39 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) if (err) goto out_dput; - whiteout = ovl_whiteout(workdir, dentry); - err = PTR_ERR(whiteout); - if (IS_ERR(whiteout)) + upper = lookup_one_len(dentry->d_name.name, upperdir, + dentry->d_name.len); + err = PTR_ERR(upper); + if (IS_ERR(upper)) goto out_unlock; - upper = ovl_dentry_upper(dentry); - if (!upper) { - upper = lookup_one_len(dentry->d_name.name, upperdir, - dentry->d_name.len); - err = PTR_ERR(upper); - if (IS_ERR(upper)) - goto kill_whiteout; - - err = ovl_do_rename(wdir, whiteout, udir, upper, 0); - dput(upper); - if (err) - goto kill_whiteout; - } else { - int flags = 0; + err = -ESTALE; + if ((opaquedir && upper != opaquedir) || + (!opaquedir && ovl_dentry_upper(dentry) && + upper != ovl_dentry_upper(dentry))) { + goto out_dput_upper; + } - if (opaquedir) - upper = opaquedir; - err = -ESTALE; - if (upper->d_parent != upperdir) - goto kill_whiteout; + whiteout = ovl_whiteout(workdir, dentry); + err = PTR_ERR(whiteout); + if (IS_ERR(whiteout)) + goto out_dput_upper; - if (is_dir) - flags |= RENAME_EXCHANGE; + if (d_is_dir(upper)) + flags = RENAME_EXCHANGE; - err = ovl_do_rename(wdir, whiteout, udir, upper, flags); - if (err) - goto kill_whiteout; + err = ovl_do_rename(wdir, whiteout, udir, upper, flags); + if (err) + goto kill_whiteout; + if (flags) + ovl_cleanup(wdir, upper); - if (is_dir) - ovl_cleanup(wdir, upper); - } ovl_dentry_version_inc(dentry->d_parent); out_d_drop: d_drop(dentry); dput(whiteout); +out_dput_upper: + dput(upper); out_unlock: unlock_rename(workdir, upperdir); out_dput: @@ -662,32 +650,11 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) if (OVL_TYPE_PURE_UPPER(type)) { err = ovl_remove_upper(dentry, is_dir); } else { - const struct cred *old_cred; - struct cred *override_cred; - - err = -ENOMEM; - override_cred = prepare_creds(); - if (!override_cred) - goto out_drop_write; - - /* - * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir - * CAP_DAC_OVERRIDE for create in workdir, rename - * CAP_FOWNER for removing whiteout from sticky dir - * CAP_FSETID for chmod of opaque dir - * CAP_CHOWN for chown of opaque dir - */ - cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - cap_raise(override_cred->cap_effective, CAP_FOWNER); - cap_raise(override_cred->cap_effective, CAP_FSETID); - cap_raise(override_cred->cap_effective, CAP_CHOWN); - old_cred = override_creds(override_cred); + const struct cred *old_cred = ovl_override_creds(dentry->d_sb); err = ovl_remove_and_whiteout(dentry, is_dir); revert_creds(old_cred); - put_cred(override_cred); } out_drop_write: ovl_drop_write(dentry); @@ -725,7 +692,6 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, bool new_is_dir = false; struct dentry *opaquedir = NULL; const struct cred *old_cred = NULL; - struct cred *override_cred = NULL; err = -EINVAL; if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE)) @@ -794,26 +760,8 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, old_opaque = !OVL_TYPE_PURE_UPPER(old_type); new_opaque = !OVL_TYPE_PURE_UPPER(new_type); - if (old_opaque || new_opaque) { - err = -ENOMEM; - override_cred = prepare_creds(); - if (!override_cred) - goto out_drop_write; - - /* - * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir - * CAP_DAC_OVERRIDE for create in workdir - * CAP_FOWNER for removing whiteout from sticky dir - * CAP_FSETID for chmod of opaque dir - * CAP_CHOWN for chown of opaque dir - */ - cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - cap_raise(override_cred->cap_effective, CAP_FOWNER); - cap_raise(override_cred->cap_effective, CAP_FSETID); - cap_raise(override_cred->cap_effective, CAP_CHOWN); - old_cred = override_creds(override_cred); - } + if (old_opaque || new_opaque) + old_cred = ovl_override_creds(old->d_sb); if (overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) { opaquedir = ovl_check_empty_and_clear(new); @@ -943,10 +891,8 @@ out_dput_old: out_unlock: unlock_rename(new_upperdir, old_upperdir); out_revert_creds: - if (old_opaque || new_opaque) { + if (old_opaque || new_opaque) revert_creds(old_cred); - put_cred(override_cred); - } out_drop_write: ovl_drop_write(old); out: diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index c7b31a03dc9c..d1cdc60dd68f 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -59,16 +59,40 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) if (err) goto out; + if (attr->ia_valid & ATTR_SIZE) { + struct inode *realinode = d_inode(ovl_dentry_real(dentry)); + + err = -ETXTBSY; + if (atomic_read(&realinode->i_writecount) < 0) + goto out_drop_write; + } + err = ovl_copy_up(dentry); if (!err) { + struct inode *winode = NULL; + upperdentry = ovl_dentry_upper(dentry); + if (attr->ia_valid & ATTR_SIZE) { + winode = d_inode(upperdentry); + err = get_write_access(winode); + if (err) + goto out_drop_write; + } + + if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) + attr->ia_valid &= ~ATTR_MODE; + inode_lock(upperdentry->d_inode); err = notify_change(upperdentry, attr, NULL); if (!err) ovl_copyattr(upperdentry->d_inode, dentry->d_inode); inode_unlock(upperdentry->d_inode); + + if (winode) + put_write_access(winode); } +out_drop_write: ovl_drop_write(dentry); out: return err; @@ -121,16 +145,18 @@ int ovl_permission(struct inode *inode, int mask) err = vfs_getattr(&realpath, &stat); if (err) - return err; + goto out_dput; + err = -ESTALE; if ((stat.mode ^ inode->i_mode) & S_IFMT) - return -ESTALE; + goto out_dput; inode->i_mode = stat.mode; inode->i_uid = stat.uid; inode->i_gid = stat.gid; - return generic_permission(inode, mask); + err = generic_permission(inode, mask); + goto out_dput; } /* Careful in RCU walk mode */ @@ -210,8 +236,9 @@ static bool ovl_is_private_xattr(const char *name) return strncmp(name, OVL_XATTR_PRE_NAME, OVL_XATTR_PRE_LEN) == 0; } -int ovl_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) +int ovl_setxattr(struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { int err; struct dentry *upperdentry; @@ -237,41 +264,27 @@ out: return err; } -static bool ovl_need_xattr_filter(struct dentry *dentry, - enum ovl_path_type type) -{ - if ((type & (__OVL_PATH_PURE | __OVL_PATH_UPPER)) == __OVL_PATH_UPPER) - return S_ISDIR(dentry->d_inode->i_mode); - else - return false; -} - ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { - struct path realpath; - enum ovl_path_type type = ovl_path_real(dentry, &realpath); + struct dentry *realdentry = ovl_dentry_real(dentry); - if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) + if (ovl_is_private_xattr(name)) return -ENODATA; - return vfs_getxattr(realpath.dentry, name, value, size); + return vfs_getxattr(realdentry, name, value, size); } ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { - struct path realpath; - enum ovl_path_type type = ovl_path_real(dentry, &realpath); + struct dentry *realdentry = ovl_dentry_real(dentry); ssize_t res; int off; - res = vfs_listxattr(realpath.dentry, list, size); + res = vfs_listxattr(realdentry, list, size); if (res <= 0 || size == 0) return res; - if (!ovl_need_xattr_filter(dentry, type)) - return res; - /* filter out private xattrs */ for (off = 0; off < res;) { char *s = list + off; @@ -301,7 +314,7 @@ int ovl_removexattr(struct dentry *dentry, const char *name) goto out; err = -ENODATA; - if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) + if (ovl_is_private_xattr(name)) goto out_drop_write; if (!OVL_TYPE_UPPER(type)) { @@ -400,12 +413,11 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, if (!inode) return NULL; - mode &= S_IFMT; - inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_flags |= S_NOATIME | S_NOCMTIME; + mode &= S_IFMT; switch (mode) { case S_IFDIR: inode->i_private = oe; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 99ec4b035237..cfbca53590d0 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -153,6 +153,7 @@ void ovl_drop_write(struct dentry *dentry); bool ovl_dentry_is_opaque(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque); bool ovl_is_whiteout(struct dentry *dentry); +const struct cred *ovl_override_creds(struct super_block *sb); void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); @@ -171,8 +172,9 @@ int ovl_check_d_type_supported(struct path *realpath); /* inode.c */ int ovl_setattr(struct dentry *dentry, struct iattr *attr); int ovl_permission(struct inode *inode, int mask); -int ovl_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); +int ovl_setxattr(struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags); ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); @@ -185,6 +187,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) { to->i_uid = from->i_uid; to->i_gid = from->i_gid; + to->i_mode = from->i_mode; } /* dir.c */ diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index da186ee4f846..cf37fc76fc9f 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -36,6 +36,7 @@ struct ovl_dir_cache { struct ovl_readdir_data { struct dir_context ctx; + struct dentry *dentry; bool is_lowest; struct rb_root root; struct list_head *list; @@ -206,21 +207,10 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) struct ovl_cache_entry *p; struct dentry *dentry; const struct cred *old_cred; - struct cred *override_cred; - - override_cred = prepare_creds(); - if (!override_cred) - return -ENOMEM; - /* - * CAP_DAC_OVERRIDE for lookup - */ - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - old_cred = override_creds(override_cred); + old_cred = ovl_override_creds(rdd->dentry->d_sb); - inode_lock(dir->d_inode); - err = 0; - // XXX: err = mutex_lock_killable(&dir->d_inode->i_mutex); + err = down_write_killable(&dir->d_inode->i_rwsem); if (!err) { while (rdd->first_maybe_whiteout) { p = rdd->first_maybe_whiteout; @@ -234,7 +224,6 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) inode_unlock(dir->d_inode); } revert_creds(old_cred); - put_cred(override_cred); return err; } @@ -290,6 +279,7 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) struct path realpath; struct ovl_readdir_data rdd = { .ctx.actor = ovl_fill_merge, + .dentry = dentry, .list = list, .root = RB_ROOT, .is_lowest = false, diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index ed53ae0fe868..9a7693d5f8ff 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -42,6 +42,8 @@ struct ovl_fs { long lower_namelen; /* pathnames of lower and upper dirs, for show_options */ struct ovl_config config; + /* creds of process who forced instantiation of super block */ + const struct cred *creator_cred; }; struct ovl_dir_cache; @@ -265,6 +267,13 @@ bool ovl_is_whiteout(struct dentry *dentry) return inode && IS_WHITEOUT(inode); } +const struct cred *ovl_override_creds(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return override_creds(ofs->creator_cred); +} + static bool ovl_is_opaquedir(struct dentry *dentry) { int res; @@ -603,6 +612,7 @@ static void ovl_put_super(struct super_block *sb) kfree(ufs->config.lowerdir); kfree(ufs->config.upperdir); kfree(ufs->config.workdir); + put_cred(ufs->creator_cred); kfree(ufs); } @@ -1064,16 +1074,21 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) /* * Upper should support d_type, else whiteouts are visible. * Given workdir and upper are on same fs, we can do - * iterate_dir() on workdir. + * iterate_dir() on workdir. This check requires successful + * creation of workdir in previous step. */ - err = ovl_check_d_type_supported(&workpath); - if (err < 0) - goto out_put_workdir; + if (ufs->workdir) { + err = ovl_check_d_type_supported(&workpath); + if (err < 0) + goto out_put_workdir; - if (!err) { - pr_err("overlayfs: upper fs needs to support d_type.\n"); - err = -EINVAL; - goto out_put_workdir; + /* + * We allowed this configuration and don't want to + * break users over kernel upgrade. So warn instead + * of erroring out. + */ + if (!err) + pr_warn("overlayfs: upper fs needs to support d_type.\n"); } } @@ -1108,10 +1123,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) else sb->s_d_op = &ovl_dentry_operations; + ufs->creator_cred = prepare_creds(); + if (!ufs->creator_cred) + goto out_put_lower_mnt; + err = -ENOMEM; oe = ovl_alloc_entry(numlower); if (!oe) - goto out_put_lower_mnt; + goto out_put_cred; root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, oe)); if (!root_dentry) @@ -1144,6 +1163,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) out_free_oe: kfree(oe); +out_put_cred: + put_cred(ufs->creator_cred); out_put_lower_mnt: for (i = 0; i < ufs->numlower; i++) mntput(ufs->lower_mnt[i]); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 2c60f17e7d92..edc452c2a563 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -820,39 +820,43 @@ posix_acl_xattr_get(const struct xattr_handler *handler, return error; } -static int -posix_acl_xattr_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) +int +set_posix_acl(struct inode *inode, int type, struct posix_acl *acl) { - struct inode *inode = d_backing_inode(dentry); - struct posix_acl *acl = NULL; - int ret; - if (!IS_POSIXACL(inode)) return -EOPNOTSUPP; if (!inode->i_op->set_acl) return -EOPNOTSUPP; - if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) - return value ? -EACCES : 0; + if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; if (!inode_owner_or_capable(inode)) return -EPERM; + if (acl) { + int ret = posix_acl_valid(acl); + if (ret) + return ret; + } + return inode->i_op->set_acl(inode, acl, type); +} +EXPORT_SYMBOL(set_posix_acl); + +static int +posix_acl_xattr_set(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) +{ + struct posix_acl *acl = NULL; + int ret; + if (value) { acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); - - if (acl) { - ret = posix_acl_valid(acl); - if (ret) - goto out; - } } - - ret = inode->i_op->set_acl(inode, acl, handler->flags); -out: + ret = set_posix_acl(inode, handler->flags, acl); posix_acl_release(acl); return ret; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 55bc7d6c8aac..06702783bf40 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -121,6 +121,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, if (IS_ERR(sb)) return ERR_CAST(sb); + /* + * procfs isn't actually a stacking filesystem; however, there is + * too much magic going on inside it to permit stacking things on + * top of it + */ + sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; + if (!proc_parse_options(options, ns)) { deactivate_locked_super(sb); return ERR_PTR(-EINVAL); diff --git a/fs/read_write.c b/fs/read_write.c index 933b53a375b4..66215a7b17cf 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1168,6 +1168,15 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, return do_compat_preadv64(fd, vec, vlen, pos, 0); } +#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 +COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, + const struct compat_iovec __user *,vec, + unsigned long, vlen, loff_t, pos, int, flags) +{ + return do_compat_preadv64(fd, vec, vlen, pos, flags); +} +#endif + COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high, @@ -1265,6 +1274,15 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, return do_compat_pwritev64(fd, vec, vlen, pos, 0); } +#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 +COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, + const struct compat_iovec __user *,vec, + unsigned long, vlen, loff_t, pos, int, flags) +{ + return do_compat_pwritev64(fd, vec, vlen, pos, flags); +} +#endif + COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags) diff --git a/fs/readdir.c b/fs/readdir.c index 68ef06efe6bc..9d0212c374d6 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -35,13 +35,13 @@ int iterate_dir(struct file *file, struct dir_context *ctx) if (res) goto out; - if (shared) + if (shared) { inode_lock_shared(inode); - else - inode_lock(inode); - // res = mutex_lock_killable(&inode->i_mutex); - // if (res) - // goto out; + } else { + res = down_write_killable(&inode->i_rwsem); + if (res) + goto out; + } res = -ENOENT; if (!IS_DEADDIR(inode)) { diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b8f2d1e8c645..c72c16c5a60f 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1393,7 +1393,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) unsigned long safe_mask = 0; unsigned int commit_max_age = (unsigned int)-1; struct reiserfs_journal *journal = SB_JOURNAL(s); - char *new_opts = kstrdup(arg, GFP_KERNEL); + char *new_opts; int err; char *qf_names[REISERFS_MAXQUOTAS]; unsigned int qfmt = 0; @@ -1401,6 +1401,10 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) int i; #endif + new_opts = kstrdup(arg, GFP_KERNEL); + if (arg && !new_opts) + return -ENOMEM; + sync_filesystem(s); reiserfs_write_lock(s); @@ -1546,7 +1550,8 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) } out_ok_unlocked: - replace_mount_options(s, new_opts); + if (new_opts) + replace_mount_options(s, new_opts); return 0; out_err_unlock: diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index 86aeb9dd805a..e4cbb7719906 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -20,13 +20,14 @@ security_get(const struct xattr_handler *handler, struct dentry *unused, } static int -security_set(const struct xattr_handler *handler, struct dentry *dentry, - const char *name, const void *buffer, size_t size, int flags) +security_set(const struct xattr_handler *handler, struct dentry *unused, + struct inode *inode, const char *name, const void *buffer, + size_t size, int flags) { - if (IS_PRIVATE(d_inode(dentry))) + if (IS_PRIVATE(inode)) return -EPERM; - return reiserfs_xattr_set(d_inode(dentry), + return reiserfs_xattr_set(inode, xattr_full_name(handler, name), buffer, size, flags); } diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c index 31837f031f59..f15a5f9e84ce 100644 --- a/fs/reiserfs/xattr_trusted.c +++ b/fs/reiserfs/xattr_trusted.c @@ -19,13 +19,14 @@ trusted_get(const struct xattr_handler *handler, struct dentry *unused, } static int -trusted_set(const struct xattr_handler *handler, struct dentry *dentry, - const char *name, const void *buffer, size_t size, int flags) +trusted_set(const struct xattr_handler *handler, struct dentry *unused, + struct inode *inode, const char *name, const void *buffer, + size_t size, int flags) { - if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry))) + if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) return -EPERM; - return reiserfs_xattr_set(d_inode(dentry), + return reiserfs_xattr_set(inode, xattr_full_name(handler, name), buffer, size, flags); } diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c index f7c39731684b..dc59df43b2db 100644 --- a/fs/reiserfs/xattr_user.c +++ b/fs/reiserfs/xattr_user.c @@ -17,12 +17,13 @@ user_get(const struct xattr_handler *handler, struct dentry *unused, } static int -user_set(const struct xattr_handler *handler, struct dentry *dentry, - const char *name, const void *buffer, size_t size, int flags) +user_set(const struct xattr_handler *handler, struct dentry *unused, + struct inode *inode, const char *name, const void *buffer, + size_t size, int flags) { - if (!reiserfs_xattrs_user(dentry->d_sb)) + if (!reiserfs_xattrs_user(inode->i_sb)) return -EOPNOTSUPP; - return reiserfs_xattr_set(d_inode(dentry), + return reiserfs_xattr_set(inode, xattr_full_name(handler, name), buffer, size, flags); } diff --git a/fs/timerfd.c b/fs/timerfd.c index 053818dd6c18..9ae4abb4110b 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -390,6 +390,11 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) clockid != CLOCK_BOOTTIME_ALARM)) return -EINVAL; + if (!capable(CAP_WAKE_ALARM) && + (clockid == CLOCK_REALTIME_ALARM || + clockid == CLOCK_BOOTTIME_ALARM)) + return -EPERM; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -433,6 +438,11 @@ static int do_timerfd_settime(int ufd, int flags, return ret; ctx = f.file->private_data; + if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) { + fdput(f); + return -EPERM; + } + timerfd_setup_cancel(ctx, flags); /* diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 595ca0debe11..69e287e20732 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -260,7 +260,7 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) pr_err("\txattr_names %u\n", ui->xattr_names); pr_err("\tdirty %u\n", ui->dirty); pr_err("\txattr %u\n", ui->xattr); - pr_err("\tbulk_read %u\n", ui->xattr); + pr_err("\tbulk_read %u\n", ui->bulk_read); pr_err("\tsynced_i_size %llu\n", (unsigned long long)ui->synced_i_size); pr_err("\tui_size %llu\n", diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 08316972ff93..7bbf420d1289 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -52,6 +52,7 @@ #include "ubifs.h" #include <linux/mount.h> #include <linux/slab.h> +#include <linux/migrate.h> static int read_block(struct inode *inode, void *addr, unsigned int block, struct ubifs_data_node *dn) @@ -1452,6 +1453,26 @@ static int ubifs_set_page_dirty(struct page *page) return ret; } +#ifdef CONFIG_MIGRATION +static int ubifs_migrate_page(struct address_space *mapping, + struct page *newpage, struct page *page, enum migrate_mode mode) +{ + int rc; + + rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0); + if (rc != MIGRATEPAGE_SUCCESS) + return rc; + + if (PagePrivate(page)) { + ClearPagePrivate(page); + SetPagePrivate(newpage); + } + + migrate_page_copy(newpage, page); + return MIGRATEPAGE_SUCCESS; +} +#endif + static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) { /* @@ -1591,6 +1612,9 @@ const struct address_space_operations ubifs_file_address_operations = { .write_end = ubifs_write_end, .invalidatepage = ubifs_invalidatepage, .set_page_dirty = ubifs_set_page_dirty, +#ifdef CONFIG_MIGRATION + .migratepage = ubifs_migrate_page, +#endif .releasepage = ubifs_releasepage, }; diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 6c277eb6aef9..b5fc27969e9d 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -579,11 +579,10 @@ static int ubifs_xattr_get(const struct xattr_handler *handler, } static int ubifs_xattr_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - struct inode *inode = d_inode(dentry); - dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", name, inode->i_ino, dentry, size); diff --git a/fs/udf/partition.c b/fs/udf/partition.c index 5f861ed287c3..888c364b2fe9 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -295,7 +295,8 @@ static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block, map = &UDF_SB(sb)->s_partmaps[partition]; /* map to sparable/physical partition desc */ phyblock = udf_get_pblock(sb, eloc.logicalBlockNum, - map->s_partition_num, ext_offset + offset); + map->s_type_specific.s_metadata.s_phys_partition_ref, + ext_offset + offset); } brelse(epos.bh); @@ -317,14 +318,18 @@ uint32_t udf_get_pblock_meta25(struct super_block *sb, uint32_t block, mdata = &map->s_type_specific.s_metadata; inode = mdata->s_metadata_fe ? : mdata->s_mirror_fe; - /* We shouldn't mount such media... */ - BUG_ON(!inode); + if (!inode) + return 0xFFFFFFFF; + retblk = udf_try_read_meta(inode, block, partition, offset); if (retblk == 0xFFFFFFFF && mdata->s_metadata_fe) { udf_warn(sb, "error reading from METADATA, trying to read from MIRROR\n"); if (!(mdata->s_flags & MF_MIRROR_FE_LOADED)) { mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb, - mdata->s_mirror_file_loc, map->s_partition_num); + mdata->s_mirror_file_loc, + mdata->s_phys_partition_ref); + if (IS_ERR(mdata->s_mirror_fe)) + mdata->s_mirror_fe = NULL; mdata->s_flags |= MF_MIRROR_FE_LOADED; } diff --git a/fs/udf/super.c b/fs/udf/super.c index 5e2c8c814e1b..4942549e7dc8 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -951,13 +951,13 @@ out2: } struct inode *udf_find_metadata_inode_efe(struct super_block *sb, - u32 meta_file_loc, u32 partition_num) + u32 meta_file_loc, u32 partition_ref) { struct kernel_lb_addr addr; struct inode *metadata_fe; addr.logicalBlockNum = meta_file_loc; - addr.partitionReferenceNum = partition_num; + addr.partitionReferenceNum = partition_ref; metadata_fe = udf_iget_special(sb, &addr); @@ -974,7 +974,8 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb, return metadata_fe; } -static int udf_load_metadata_files(struct super_block *sb, int partition) +static int udf_load_metadata_files(struct super_block *sb, int partition, + int type1_index) { struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map; @@ -984,20 +985,21 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) map = &sbi->s_partmaps[partition]; mdata = &map->s_type_specific.s_metadata; + mdata->s_phys_partition_ref = type1_index; /* metadata address */ udf_debug("Metadata file location: block = %d part = %d\n", - mdata->s_meta_file_loc, map->s_partition_num); + mdata->s_meta_file_loc, mdata->s_phys_partition_ref); fe = udf_find_metadata_inode_efe(sb, mdata->s_meta_file_loc, - map->s_partition_num); + mdata->s_phys_partition_ref); if (IS_ERR(fe)) { /* mirror file entry */ udf_debug("Mirror metadata file location: block = %d part = %d\n", - mdata->s_mirror_file_loc, map->s_partition_num); + mdata->s_mirror_file_loc, mdata->s_phys_partition_ref); fe = udf_find_metadata_inode_efe(sb, mdata->s_mirror_file_loc, - map->s_partition_num); + mdata->s_phys_partition_ref); if (IS_ERR(fe)) { udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n"); @@ -1015,7 +1017,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) */ if (mdata->s_bitmap_file_loc != 0xFFFFFFFF) { addr.logicalBlockNum = mdata->s_bitmap_file_loc; - addr.partitionReferenceNum = map->s_partition_num; + addr.partitionReferenceNum = mdata->s_phys_partition_ref; udf_debug("Bitmap file location: block = %d part = %d\n", addr.logicalBlockNum, addr.partitionReferenceNum); @@ -1283,7 +1285,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block) p = (struct partitionDesc *)bh->b_data; partitionNumber = le16_to_cpu(p->partitionNumber); - /* First scan for TYPE1, SPARABLE and METADATA partitions */ + /* First scan for TYPE1 and SPARABLE partitions */ for (i = 0; i < sbi->s_partitions; i++) { map = &sbi->s_partmaps[i]; udf_debug("Searching map: (%d == %d)\n", @@ -1333,7 +1335,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block) goto out_bh; if (map->s_partition_type == UDF_METADATA_MAP25) { - ret = udf_load_metadata_files(sb, i); + ret = udf_load_metadata_files(sb, i, type1_idx); if (ret < 0) { udf_err(sb, "error loading MetaData partition map %d\n", i); diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 27b5335730c9..c13875d669c0 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -61,6 +61,11 @@ struct udf_meta_data { __u32 s_bitmap_file_loc; __u32 s_alloc_unit_size; __u16 s_align_unit_size; + /* + * Partition Reference Number of the associated physical / sparable + * partition + */ + __u16 s_phys_partition_ref; int s_flags; struct inode *s_metadata_fe; struct inode *s_mirror_fe; diff --git a/fs/xattr.c b/fs/xattr.c index fc81e771488a..4beafc43daa5 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -100,7 +100,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, if (issec) inode->i_flags &= ~S_NOSEC; if (inode->i_op->setxattr) { - error = inode->i_op->setxattr(dentry, name, value, size, flags); + error = inode->i_op->setxattr(dentry, inode, name, value, size, flags); if (!error) { fsnotify_xattr(dentry); security_inode_post_setxattr(dentry, name, value, @@ -745,7 +745,8 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) * Find the handler for the prefix and dispatch its set() operation. */ int -generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) +generic_setxattr(struct dentry *dentry, struct inode *inode, const char *name, + const void *value, size_t size, int flags) { const struct xattr_handler *handler; @@ -754,7 +755,7 @@ generic_setxattr(struct dentry *dentry, const char *name, const void *value, siz handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); if (IS_ERR(handler)) return PTR_ERR(handler); - return handler->set(handler, dentry, name, value, size, flags); + return handler->set(handler, dentry, inode, name, value, size, flags); } /* @@ -769,7 +770,8 @@ generic_removexattr(struct dentry *dentry, const char *name) handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); if (IS_ERR(handler)) return PTR_ERR(handler); - return handler->set(handler, dentry, name, NULL, 0, XATTR_REPLACE); + return handler->set(handler, dentry, d_inode(dentry), name, NULL, + 0, XATTR_REPLACE); } EXPORT_SYMBOL(generic_getxattr); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index dbca7375deef..63a6ff2cfc68 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1575,6 +1575,12 @@ xfs_ioc_swapext( goto out_put_tmp_file; } + if (f.file->f_op != &xfs_file_operations || + tmp.file->f_op != &xfs_file_operations) { + error = -EINVAL; + goto out_put_tmp_file; + } + ip = XFS_I(file_inode(f.file)); tip = XFS_I(file_inode(tmp.file)); diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index ec58ff094b1d..ea62245fee26 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -74,11 +74,12 @@ xfs_forget_acl( } static int -xfs_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, - const char *name, const void *value, size_t size, int flags) +xfs_xattr_set(const struct xattr_handler *handler, struct dentry *unused, + struct inode *inode, const char *name, const void *value, + size_t size, int flags) { int xflags = handler->flags; - struct xfs_inode *ip = XFS_I(d_inode(dentry)); + struct xfs_inode *ip = XFS_I(inode); int error; /* Convert Linux syscall to XFS internal ATTR flags */ @@ -92,7 +93,7 @@ xfs_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, error = xfs_attr_set(ip, (unsigned char *)name, (void *)value, size, xflags); if (!error) - xfs_forget_acl(d_inode(dentry), name, xflags); + xfs_forget_acl(inode, name, xflags); return error; } |