summaryrefslogtreecommitdiffstats
path: root/fs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/inode.c')
-rw-r--r--fs/inode.c128
1 files changed, 77 insertions, 51 deletions
diff --git a/fs/inode.c b/fs/inode.c
index ae7b67e48661..4d8e3be55976 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/backing-dev.h>
#include <linux/wait.h>
+#include <linux/rwsem.h>
#include <linux/hash.h>
#include <linux/swap.h>
#include <linux/security.h>
@@ -87,14 +88,18 @@ static struct hlist_head *inode_hashtable __read_mostly;
DEFINE_SPINLOCK(inode_lock);
/*
- * iprune_mutex provides exclusion between the kswapd or try_to_free_pages
+ * iprune_sem provides exclusion between the kswapd or try_to_free_pages
* icache shrinking path, and the umount path. Without this exclusion,
* by the time prune_icache calls iput for the inode whose pages it has
* been invalidating, or by the time it calls clear_inode & destroy_inode
* from its final dispose_list, the struct super_block they refer to
* (for inode->i_sb->s_op) may already have been freed and reused.
+ *
+ * We make this an rwsem because the fastpath is icache shrinking. In
+ * some cases a filesystem may be doing a significant amount of work in
+ * its inode reclaim code, so this should improve parallelism.
*/
-static DEFINE_MUTEX(iprune_mutex);
+static DECLARE_RWSEM(iprune_sem);
/*
* Statistics gathering..
@@ -123,7 +128,7 @@ static void wake_up_inode(struct inode *inode)
int inode_init_always(struct super_block *sb, struct inode *inode)
{
static const struct address_space_operations empty_aops;
- static struct inode_operations empty_iops;
+ static const struct inode_operations empty_iops;
static const struct file_operations empty_fops;
struct address_space *const mapping = &inode->i_data;
@@ -182,9 +187,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
if (sb->s_bdev) {
struct backing_dev_info *bdi;
- bdi = sb->s_bdev->bd_inode_backing_dev_info;
- if (!bdi)
- bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
+ bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
mapping->backing_dev_info = bdi;
}
inode->i_private = NULL;
@@ -383,7 +386,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
/*
* We can reschedule here without worrying about the list's
* consistency because the per-sb list of inodes must not
- * change during umount anymore, and because iprune_mutex keeps
+ * change during umount anymore, and because iprune_sem keeps
* shrink_icache_memory() away.
*/
cond_resched_lock(&inode_lock);
@@ -422,7 +425,7 @@ int invalidate_inodes(struct super_block *sb)
int busy;
LIST_HEAD(throw_away);
- mutex_lock(&iprune_mutex);
+ down_write(&iprune_sem);
spin_lock(&inode_lock);
inotify_unmount_inodes(&sb->s_inodes);
fsnotify_unmount_inodes(&sb->s_inodes);
@@ -430,7 +433,7 @@ int invalidate_inodes(struct super_block *sb)
spin_unlock(&inode_lock);
dispose_list(&throw_away);
- mutex_unlock(&iprune_mutex);
+ up_write(&iprune_sem);
return busy;
}
@@ -469,7 +472,7 @@ static void prune_icache(int nr_to_scan)
int nr_scanned;
unsigned long reap = 0;
- mutex_lock(&iprune_mutex);
+ down_read(&iprune_sem);
spin_lock(&inode_lock);
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
struct inode *inode;
@@ -511,7 +514,7 @@ static void prune_icache(int nr_to_scan)
spin_unlock(&inode_lock);
dispose_list(&freeable);
- mutex_unlock(&iprune_mutex);
+ up_read(&iprune_sem);
}
/*
@@ -697,13 +700,15 @@ void unlock_new_inode(struct inode *inode)
}
#endif
/*
- * This is special! We do not need the spinlock
- * when clearing I_LOCK, because we're guaranteed
- * that nobody else tries to do anything about the
- * state of the inode when it is locked, as we
- * just created it (so there can be no old holders
- * that haven't tested I_LOCK).
+ * This is special! We do not need the spinlock when clearing I_LOCK,
+ * because we're guaranteed that nobody else tries to do anything about
+ * the state of the inode when it is locked, as we just created it (so
+ * there can be no old holders that haven't tested I_LOCK).
+ * However we must emit the memory barrier so that other CPUs reliably
+ * see the clearing of I_LOCK after the other inode initialisation has
+ * completed.
*/
+ smp_mb();
WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
inode->i_state &= ~(I_LOCK|I_NEW);
wake_up_inode(inode);
@@ -1236,7 +1241,16 @@ void generic_delete_inode(struct inode *inode)
}
EXPORT_SYMBOL(generic_delete_inode);
-static void generic_forget_inode(struct inode *inode)
+/**
+ * generic_detach_inode - remove inode from inode lists
+ * @inode: inode to remove
+ *
+ * Remove inode from inode lists, write it if it's dirty. This is just an
+ * internal VFS helper exported for hugetlbfs. Do not use!
+ *
+ * Returns 1 if inode should be completely destroyed.
+ */
+int generic_detach_inode(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
@@ -1246,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode)
inodes_stat.nr_unused++;
if (sb->s_flags & MS_ACTIVE) {
spin_unlock(&inode_lock);
- return;
+ return 0;
}
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_WILL_FREE;
@@ -1264,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode)
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(generic_detach_inode);
+
+static void generic_forget_inode(struct inode *inode)
+{
+ if (!generic_detach_inode(inode))
+ return;
if (inode->i_data.nrpages)
truncate_inode_pages(&inode->i_data, 0);
clear_inode(inode);
@@ -1394,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
struct inode *inode = dentry->d_inode;
struct timespec now;
- if (mnt_want_write(mnt))
- return;
if (inode->i_flags & S_NOATIME)
- goto out;
+ return;
if (IS_NOATIME(inode))
- goto out;
+ return;
if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
- goto out;
+ return;
if (mnt->mnt_flags & MNT_NOATIME)
- goto out;
+ return;
if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
- goto out;
+ return;
now = current_fs_time(inode->i_sb);
if (!relatime_need_update(mnt, inode, now))
- goto out;
+ return;
if (timespec_equal(&inode->i_atime, &now))
- goto out;
+ return;
+
+ if (mnt_want_write(mnt))
+ return;
inode->i_atime = now;
mark_inode_dirty_sync(inode);
-out:
mnt_drop_write(mnt);
}
EXPORT_SYMBOL(touch_atime);
@@ -1439,34 +1461,37 @@ void file_update_time(struct file *file)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct timespec now;
- int sync_it = 0;
- int err;
+ enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
+ /* First try to exhaust all avenues to not sync */
if (IS_NOCMTIME(inode))
return;
- err = mnt_want_write_file(file);
- if (err)
- return;
-
now = current_fs_time(inode->i_sb);
- if (!timespec_equal(&inode->i_mtime, &now)) {
- inode->i_mtime = now;
- sync_it = 1;
- }
+ if (!timespec_equal(&inode->i_mtime, &now))
+ sync_it = S_MTIME;
- if (!timespec_equal(&inode->i_ctime, &now)) {
- inode->i_ctime = now;
- sync_it = 1;
- }
+ if (!timespec_equal(&inode->i_ctime, &now))
+ sync_it |= S_CTIME;
- if (IS_I_VERSION(inode)) {
- inode_inc_iversion(inode);
- sync_it = 1;
- }
+ if (IS_I_VERSION(inode))
+ sync_it |= S_VERSION;
+
+ if (!sync_it)
+ return;
- if (sync_it)
- mark_inode_dirty_sync(inode);
+ /* Finally allowed to write? Takes lock. */
+ if (mnt_want_write_file(file))
+ return;
+
+ /* Only change inode inside the lock region */
+ if (sync_it & S_VERSION)
+ inode_inc_iversion(inode);
+ if (sync_it & S_CTIME)
+ inode->i_ctime = now;
+ if (sync_it & S_MTIME)
+ inode->i_mtime = now;
+ mark_inode_dirty_sync(inode);
mnt_drop_write(file->f_path.mnt);
}
EXPORT_SYMBOL(file_update_time);
@@ -1594,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
else if (S_ISSOCK(mode))
inode->i_fop = &bad_sock_fops;
else
- printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n",
- mode);
+ printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
+ " inode %s:%lu\n", mode, inode->i_sb->s_id,
+ inode->i_ino);
}
EXPORT_SYMBOL(init_special_inode);