summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c42
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/extent-tree.c45
-rw-r--r--fs/btrfs/volumes.c8
-rw-r--r--fs/buffer.c23
-rw-r--r--fs/ext4/extents.c6
-rw-r--r--fs/ext4/ialloc.c4
-rw-r--r--fs/ext4/mballoc.c13
-rw-r--r--fs/nfsd/nfs4xdr.c1
9 files changed, 117 insertions, 34 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 8fa77e233944..76da12537956 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -443,7 +443,7 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx)
req->private = NULL;
req->ki_iovec = NULL;
INIT_LIST_HEAD(&req->ki_run_list);
- req->ki_eventfd = ERR_PTR(-EINVAL);
+ req->ki_eventfd = NULL;
/* Check if the completion queue has enough free space to
* accept an event from this io.
@@ -485,8 +485,6 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
{
assert_spin_locked(&ctx->ctx_lock);
- if (!IS_ERR(req->ki_eventfd))
- fput(req->ki_eventfd);
if (req->ki_dtor)
req->ki_dtor(req);
if (req->ki_iovec != &req->ki_inline_vec)
@@ -508,8 +506,11 @@ static void aio_fput_routine(struct work_struct *data)
list_del(&req->ki_list);
spin_unlock_irq(&fput_lock);
- /* Complete the fput */
- __fput(req->ki_filp);
+ /* Complete the fput(s) */
+ if (req->ki_filp != NULL)
+ __fput(req->ki_filp);
+ if (req->ki_eventfd != NULL)
+ __fput(req->ki_eventfd);
/* Link the iocb into the context's free list */
spin_lock_irq(&ctx->ctx_lock);
@@ -527,12 +528,14 @@ static void aio_fput_routine(struct work_struct *data)
*/
static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
{
+ int schedule_putreq = 0;
+
dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
req, atomic_long_read(&req->ki_filp->f_count));
assert_spin_locked(&ctx->ctx_lock);
- req->ki_users --;
+ req->ki_users--;
BUG_ON(req->ki_users < 0);
if (likely(req->ki_users))
return 0;
@@ -540,10 +543,23 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
req->ki_cancel = NULL;
req->ki_retry = NULL;
- /* Must be done under the lock to serialise against cancellation.
- * Call this aio_fput as it duplicates fput via the fput_work.
+ /*
+ * Try to optimize the aio and eventfd file* puts, by avoiding to
+ * schedule work in case it is not __fput() time. In normal cases,
+ * we would not be holding the last reference to the file*, so
+ * this function will be executed w/out any aio kthread wakeup.
*/
- if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
+ if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count)))
+ schedule_putreq++;
+ else
+ req->ki_filp = NULL;
+ if (req->ki_eventfd != NULL) {
+ if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
+ schedule_putreq++;
+ else
+ req->ki_eventfd = NULL;
+ }
+ if (unlikely(schedule_putreq)) {
get_ioctx(ctx);
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
@@ -571,7 +587,7 @@ int aio_put_req(struct kiocb *req)
static struct kioctx *lookup_ioctx(unsigned long ctx_id)
{
struct mm_struct *mm = current->mm;
- struct kioctx *ctx = NULL;
+ struct kioctx *ctx, *ret = NULL;
struct hlist_node *n;
rcu_read_lock();
@@ -579,12 +595,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
if (ctx->user_id == ctx_id && !ctx->dead) {
get_ioctx(ctx);
+ ret = ctx;
break;
}
}
rcu_read_unlock();
- return ctx;
+ return ret;
}
/*
@@ -1009,7 +1026,7 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
* eventfd. The eventfd_signal() function is safe to be called
* from IRQ context.
*/
- if (!IS_ERR(iocb->ki_eventfd))
+ if (iocb->ki_eventfd != NULL)
eventfd_signal(iocb->ki_eventfd, 1);
put_rq:
@@ -1608,6 +1625,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
if (IS_ERR(req->ki_eventfd)) {
ret = PTR_ERR(req->ki_eventfd);
+ req->ki_eventfd = NULL;
goto out_put_req;
}
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 82491ba8fa40..5e1d4e30e9d8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -784,7 +784,14 @@ struct btrfs_fs_info {
struct list_head dirty_cowonly_roots;
struct btrfs_fs_devices *fs_devices;
+
+ /*
+ * the space_info list is almost entirely read only. It only changes
+ * when we add a new raid type to the FS, and that happens
+ * very rarely. RCU is used to protect it.
+ */
struct list_head space_info;
+
spinlock_t delalloc_lock;
spinlock_t new_trans_lock;
u64 delalloc_bytes;
@@ -1797,6 +1804,8 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
+void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
+
int btrfs_check_metadata_free_space(struct btrfs_root *root);
int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9abf81f71c46..fefe83ad2059 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -20,6 +20,7 @@
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/sort.h>
+#include <linux/rcupdate.h>
#include "compat.h"
#include "hash.h"
#include "crc32c.h"
@@ -330,13 +331,33 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
{
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
- list_for_each_entry(found, head, list) {
- if (found->flags == flags)
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(found, head, list) {
+ if (found->flags == flags) {
+ rcu_read_unlock();
return found;
+ }
}
+ rcu_read_unlock();
return NULL;
}
+/*
+ * after adding space to the filesystem, we need to clear the full flags
+ * on all the space infos.
+ */
+void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
+{
+ struct list_head *head = &info->space_info;
+ struct btrfs_space_info *found;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(found, head, list)
+ found->full = 0;
+ rcu_read_unlock();
+}
+
static u64 div_factor(u64 num, int factor)
{
if (factor == 10)
@@ -1903,7 +1924,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
if (!found)
return -ENOMEM;
- list_add(&found->list, &info->space_info);
INIT_LIST_HEAD(&found->block_groups);
init_rwsem(&found->groups_sem);
spin_lock_init(&found->lock);
@@ -1917,6 +1937,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->full = 0;
found->force_alloc = 0;
*space_info = found;
+ list_add_rcu(&found->list, &info->space_info);
return 0;
}
@@ -6320,6 +6341,7 @@ out:
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_block_group_cache *block_group;
+ struct btrfs_space_info *space_info;
struct rb_node *n;
spin_lock(&info->block_group_cache_lock);
@@ -6341,6 +6363,23 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
spin_lock(&info->block_group_cache_lock);
}
spin_unlock(&info->block_group_cache_lock);
+
+ /* now that all the block groups are freed, go through and
+ * free all the space_info structs. This is only called during
+ * the final stages of unmount, and so we know nobody is
+ * using them. We call synchronize_rcu() once before we start,
+ * just to be on the safe side.
+ */
+ synchronize_rcu();
+
+ while(!list_empty(&info->space_info)) {
+ space_info = list_entry(info->space_info.next,
+ struct btrfs_space_info,
+ list);
+
+ list_del(&space_info->list);
+ kfree(space_info);
+ }
return 0;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1316139bf9e8..dd06e18e5aac 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1374,6 +1374,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
ret = btrfs_add_device(trans, root, device);
}
+ /*
+ * we've got more storage, clear any full flags on the space
+ * infos
+ */
+ btrfs_clear_space_info_full(root->fs_info);
+
unlock_chunks(root);
btrfs_commit_transaction(trans, root);
@@ -1459,6 +1465,8 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
device->fs_devices->total_rw_bytes += diff;
device->total_bytes = new_size;
+ btrfs_clear_space_info_full(device->dev_root->fs_info);
+
return btrfs_update_device(trans, device);
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 9f697419ed8e..891e1c78e4f1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -760,15 +760,9 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
* If warn is true, then emit a warning if the page is not uptodate and has
* not been truncated.
*/
-static int __set_page_dirty(struct page *page,
+static void __set_page_dirty(struct page *page,
struct address_space *mapping, int warn)
{
- if (unlikely(!mapping))
- return !TestSetPageDirty(page);
-
- if (TestSetPageDirty(page))
- return 0;
-
spin_lock_irq(&mapping->tree_lock);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
@@ -785,8 +779,6 @@ static int __set_page_dirty(struct page *page,
}
spin_unlock_irq(&mapping->tree_lock);
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-
- return 1;
}
/*
@@ -816,6 +808,7 @@ static int __set_page_dirty(struct page *page,
*/
int __set_page_dirty_buffers(struct page *page)
{
+ int newly_dirty;
struct address_space *mapping = page_mapping(page);
if (unlikely(!mapping))
@@ -831,9 +824,12 @@ int __set_page_dirty_buffers(struct page *page)
bh = bh->b_this_page;
} while (bh != head);
}
+ newly_dirty = !TestSetPageDirty(page);
spin_unlock(&mapping->private_lock);
- return __set_page_dirty(page, mapping, 1);
+ if (newly_dirty)
+ __set_page_dirty(page, mapping, 1);
+ return newly_dirty;
}
EXPORT_SYMBOL(__set_page_dirty_buffers);
@@ -1262,8 +1258,11 @@ void mark_buffer_dirty(struct buffer_head *bh)
return;
}
- if (!test_set_buffer_dirty(bh))
- __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
+ if (!test_set_buffer_dirty(bh)) {
+ struct page *page = bh->b_page;
+ if (!TestSetPageDirty(page))
+ __set_page_dirty(page, page_mapping(page), 0);
+ }
}
/*
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e2eab196875f..e0aa4fe4f596 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1122,7 +1122,8 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
struct ext4_extent_idx *ix;
struct ext4_extent *ex;
ext4_fsblk_t block;
- int depth, ee_len;
+ int depth; /* Note, NOT eh_depth; depth from top of tree */
+ int ee_len;
BUG_ON(path == NULL);
depth = path->p_depth;
@@ -1179,7 +1180,8 @@ got_index:
if (bh == NULL)
return -EIO;
eh = ext_block_hdr(bh);
- if (ext4_ext_check_header(inode, eh, depth)) {
+ /* subtract from p_depth to get proper eh_depth */
+ if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
put_bh(bh);
return -EIO;
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 627f8c3337a3..2d2b3585ee91 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -698,6 +698,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
struct inode *ret;
ext4_group_t i;
int free = 0;
+ static int once = 1;
ext4_group_t flex_group;
/* Cannot create files in a deleted directory */
@@ -719,7 +720,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
ret2 = find_group_flex(sb, dir, &group);
if (ret2 == -1) {
ret2 = find_group_other(sb, dir, &group);
- if (ret2 == 0 && printk_ratelimit())
+ if (ret2 == 0 && once)
+ once = 0;
printk(KERN_NOTICE "ext4: find_group_flex "
"failed, fallback succeeded dir %lu\n",
dir->i_ino);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4415beeb0b62..9f61e62f435f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1447,7 +1447,7 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
struct ext4_free_extent *gex = &ac->ac_g_ex;
BUG_ON(ex->fe_len <= 0);
- BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+ BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
@@ -3292,7 +3292,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
}
BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
start > ac->ac_o_ex.fe_logical);
- BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+ BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
/* now prepare goal request */
@@ -3589,6 +3589,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
struct super_block *sb, struct ext4_prealloc_space *pa)
{
ext4_group_t grp;
+ ext4_fsblk_t grp_blk;
if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
return;
@@ -3603,8 +3604,12 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
pa->pa_deleted = 1;
spin_unlock(&pa->pa_lock);
- /* -1 is to protect from crossing allocation group */
- ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL);
+ grp_blk = pa->pa_pstart;
+ /* If linear, pa_pstart may be in the next group when pa is used up */
+ if (pa->pa_linear)
+ grp_blk--;
+
+ ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
/*
* possible race:
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index f65953be39c0..9250067943d8 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2596,6 +2596,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
[OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop,
[OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OPEN] = (nfsd4_enc)nfsd4_encode_open,
+ [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm,
[OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade,
[OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop,