From 16432985920f3c45af82da214e2498f3e2f9066b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 10 Apr 2008 10:23:21 -0400 Subject: Btrfs: Add O_DIRECT read and write (writes == buffered + cache flush) This adds basic O_DIRECT read and write support. In the write case, we just do a normal buffered write followed by a cache flush. O_DIRECT + O_SYNC are required to trigger metadata syncs. In the read case, there is a basic btrfs_get_block call for use by the generic O_DIRECT code. This does honor multi-volume mapping rules but it skips all checksumming. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 11 ++++++++-- fs/btrfs/inode.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5fa450452f9b..5b9386452218 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -796,8 +796,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, PAGE_CACHE_SIZE / (sizeof(struct page *))); pinned[0] = NULL; pinned[1] = NULL; - if (file->f_flags & O_DIRECT) - return -EINVAL; pos = *ppos; start_pos = pos; @@ -909,6 +907,15 @@ out_nolock: start_pos, num_written); if (err < 0) num_written = err; + } else if (num_written > 0 && (file->f_flags & O_DIRECT)) { + do_sync_mapping_range(inode->i_mapping, start_pos, + start_pos + num_written - 1, + SYNC_FILE_RANGE_WRITE | + SYNC_FILE_RANGE_WAIT_AFTER); + + invalidate_mapping_pages(inode->i_mapping, + start_pos >> PAGE_CACHE_SHIFT, + (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } current->backing_dev_info = NULL; return num_written ? num_written : err; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 48f1d1b96450..32e778fb014e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2265,6 +2265,70 @@ out: return em; } +static int btrfs_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + struct extent_map *em; + u64 start = (u64)iblock << inode->i_blkbits; + struct btrfs_multi_bio *multi = NULL; + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 len; + u64 logical; + u64 map_length; + int ret = 0; + + em = btrfs_get_extent(inode, NULL, 0, start, bh_result->b_size, 0); + + if (!em || IS_ERR(em)) + goto out; + + if (em->start > start || em->start + em->len <= start) + goto out; + + if (em->block_start == EXTENT_MAP_INLINE) { + ret = -EINVAL; + goto out; + } + + if (em->block_start == EXTENT_MAP_HOLE || + em->block_start == EXTENT_MAP_DELALLOC) { + goto out; + } + + len = em->start + em->len - start; + len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size))); + + logical = start - em->start; + logical = em->block_start + logical; + + map_length = len; + ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, + logical, &map_length, &multi, 0); + BUG_ON(ret); + bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits; + bh_result->b_size = min(map_length, len); + bh_result->b_bdev = multi->stripes[0].dev->bdev; + set_buffer_mapped(bh_result); + kfree(multi); +out: + free_extent_map(em); + return ret; +} + +static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + + if (rw == WRITE) + return -EINVAL; + + return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + offset, nr_segs, btrfs_get_block, NULL); +} + static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) { return extent_bmap(mapping, iblock, btrfs_get_extent); @@ -3136,6 +3200,7 @@ out_fail: btrfs_throttle(root); return err; } + static int btrfs_permission(struct inode *inode, int mask, struct nameidata *nd) { @@ -3193,6 +3258,7 @@ static struct address_space_operations btrfs_aops = { .readpages = btrfs_readpages, .sync_page = block_sync_page, .bmap = btrfs_bmap, + .direct_IO = btrfs_direct_IO, .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, .set_page_dirty = __set_page_dirty_nobuffers, -- cgit v1.2.3-55-g7522