summaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
authorTheodore Ts'o2009-05-01 14:50:38 +0200
committerTheodore Ts'o2009-05-01 14:50:38 +0200
commit8df9675f8b498d0bfa1f0b5b06f56bf1ff366dd5 (patch)
tree38fd56a82049f50b4d774af47b9d39f116071755 /fs/ext4/mballoc.c
parentext4: Use separate super_operations structure for no_journal filesystems (diff)
downloadkernel-qcow2-linux-8df9675f8b498d0bfa1f0b5b06f56bf1ff366dd5.tar.gz
kernel-qcow2-linux-8df9675f8b498d0bfa1f0b5b06f56bf1ff366dd5.tar.xz
kernel-qcow2-linux-8df9675f8b498d0bfa1f0b5b06f56bf1ff366dd5.zip
ext4: Avoid races caused by on-line resizing and SMP memory reordering
Ext4's on-line resizing adds a new block group and then, only at the last step adjusts s_groups_count. However, it's possible on SMP systems that another CPU could see the updated the s_group_count and not see the newly initialized data structures for the just-added block group. For this reason, it's important to insert a SMP read barrier after reading s_groups_count and before reading any (for example) the new block group descriptors allowed by the increased value of s_groups_count. Unfortunately, we rather blatently violate this locking protocol documented in fs/ext4/resize.c. Fortunately, (1) on-line resizes happen relatively rarely, and (2) it seems rare that the filesystem code will immediately try to use just-added block group before any memory ordering issues resolve themselves. So apparently problems here are relatively hard to hit, since ext3 has been vulnerable to the same issue for years with no one apparently complaining. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c45
1 files changed, 24 insertions, 21 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f871677a7984..c3af9e6b6668 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -739,6 +739,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
static int ext4_mb_init_cache(struct page *page, char *incore)
{
+ ext4_group_t ngroups;
int blocksize;
int blocks_per_page;
int groups_per_page;
@@ -757,6 +758,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
inode = page->mapping->host;
sb = inode->i_sb;
+ ngroups = ext4_get_groups_count(sb);
blocksize = 1 << inode->i_blkbits;
blocks_per_page = PAGE_CACHE_SIZE / blocksize;
@@ -780,7 +782,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
for (i = 0; i < groups_per_page; i++) {
struct ext4_group_desc *desc;
- if (first_group + i >= EXT4_SB(sb)->s_groups_count)
+ if (first_group + i >= ngroups)
break;
err = -EIO;
@@ -852,7 +854,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
struct ext4_group_info *grinfo;
group = (first_block + i) >> 1;
- if (group >= EXT4_SB(sb)->s_groups_count)
+ if (group >= ngroups)
break;
/*
@@ -1788,6 +1790,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
int block, pnum;
int blocks_per_page;
int groups_per_page;
+ ext4_group_t ngroups = ext4_get_groups_count(sb);
ext4_group_t first_group;
struct ext4_group_info *grp;
@@ -1807,7 +1810,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
/* read all groups the page covers into the cache */
for (i = 0; i < groups_per_page; i++) {
- if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
+ if ((first_group + i) >= ngroups)
break;
grp = ext4_get_group_info(sb, first_group + i);
/* take all groups write allocation
@@ -1945,8 +1948,7 @@ err:
static noinline_for_stack int
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{
- ext4_group_t group;
- ext4_group_t i;
+ ext4_group_t ngroups, group, i;
int cr;
int err = 0;
int bsbits;
@@ -1957,6 +1959,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
sb = ac->ac_sb;
sbi = EXT4_SB(sb);
+ ngroups = ext4_get_groups_count(sb);
BUG_ON(ac->ac_status == AC_STATUS_FOUND);
/* first, try the goal */
@@ -2017,11 +2020,11 @@ repeat:
*/
group = ac->ac_g_ex.fe_group;
- for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
+ for (i = 0; i < ngroups; group++, i++) {
struct ext4_group_info *grp;
struct ext4_group_desc *desc;
- if (group == EXT4_SB(sb)->s_groups_count)
+ if (group == ngroups)
group = 0;
/* quick check to skip empty groups */
@@ -2315,12 +2318,10 @@ static struct file_operations ext4_mb_seq_history_fops = {
static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
{
struct super_block *sb = seq->private;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_group_t group;
- if (*pos < 0 || *pos >= sbi->s_groups_count)
+ if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
return NULL;
-
group = *pos + 1;
return (void *) ((unsigned long) group);
}
@@ -2328,11 +2329,10 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct super_block *sb = seq->private;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_group_t group;
++*pos;
- if (*pos < 0 || *pos >= sbi->s_groups_count)
+ if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
return NULL;
group = *pos + 1;
return (void *) ((unsigned long) group);
@@ -2587,6 +2587,7 @@ void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
static int ext4_mb_init_backend(struct super_block *sb)
{
+ ext4_group_t ngroups = ext4_get_groups_count(sb);
ext4_group_t i;
int metalen;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2598,7 +2599,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
struct ext4_group_desc *desc;
/* This is the number of blocks used by GDT */
- num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) -
+ num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
/*
@@ -2644,7 +2645,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
for (i = 0; i < num_meta_group_infos; i++) {
if ((i + 1) == num_meta_group_infos)
metalen = sizeof(*meta_group_info) *
- (sbi->s_groups_count -
+ (ngroups -
(i << EXT4_DESC_PER_BLOCK_BITS(sb)));
meta_group_info = kmalloc(metalen, GFP_KERNEL);
if (meta_group_info == NULL) {
@@ -2655,7 +2656,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
sbi->s_group_info[i] = meta_group_info;
}
- for (i = 0; i < sbi->s_groups_count; i++) {
+ for (i = 0; i < ngroups; i++) {
desc = ext4_get_group_desc(sb, i, NULL);
if (desc == NULL) {
printk(KERN_ERR
@@ -2781,13 +2782,14 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
int ext4_mb_release(struct super_block *sb)
{
+ ext4_group_t ngroups = ext4_get_groups_count(sb);
ext4_group_t i;
int num_meta_group_infos;
struct ext4_group_info *grinfo;
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (sbi->s_group_info) {
- for (i = 0; i < sbi->s_groups_count; i++) {
+ for (i = 0; i < ngroups; i++) {
grinfo = ext4_get_group_info(sb, i);
#ifdef DOUBLE_CHECK
kfree(grinfo->bb_bitmap);
@@ -2797,7 +2799,7 @@ int ext4_mb_release(struct super_block *sb)
ext4_unlock_group(sb, i);
kfree(grinfo);
}
- num_meta_group_infos = (sbi->s_groups_count +
+ num_meta_group_infos = (ngroups +
EXT4_DESC_PER_BLOCK(sb) - 1) >>
EXT4_DESC_PER_BLOCK_BITS(sb);
for (i = 0; i < num_meta_group_infos; i++)
@@ -4121,7 +4123,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode,
static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
{
struct super_block *sb = ac->ac_sb;
- ext4_group_t i;
+ ext4_group_t ngroups, i;
printk(KERN_ERR "EXT4-fs: Can't allocate:"
" Allocation context details:\n");
@@ -4145,7 +4147,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
ac->ac_found);
printk(KERN_ERR "EXT4-fs: groups: \n");
- for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+ ngroups = ext4_get_groups_count(sb);
+ for (i = 0; i < ngroups; i++) {
struct ext4_group_info *grp = ext4_get_group_info(sb, i);
struct ext4_prealloc_space *pa;
ext4_grpblk_t start;
@@ -4469,13 +4472,13 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
{
- ext4_group_t i;
+ ext4_group_t i, ngroups = ext4_get_groups_count(sb);
int ret;
int freed = 0;
trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
sb->s_id, needed);
- for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) {
+ for (i = 0; i < ngroups && needed > 0; i++) {
ret = ext4_mb_discard_group_preallocations(sb, i, needed);
freed += ret;
needed -= ret;