summaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c298
1 files changed, 210 insertions, 88 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index c5af80c43d36..d0cf700bf201 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/mm/filemap.c
*
@@ -280,11 +281,11 @@ EXPORT_SYMBOL(delete_from_page_cache);
* @pvec: pagevec with pages to delete
*
* The function walks over mapping->i_pages and removes pages passed in @pvec
- * from the mapping. The function expects @pvec to be sorted by page index
- * and is optimised for it to be dense.
+ * from the mapping. The function expects @pvec to be sorted by page index.
* It tolerates holes in @pvec (mapping entries at those indices are not
* modified). The function expects only THP head pages to be present in the
- * @pvec.
+ * @pvec and takes care to delete all corresponding tail pages from the
+ * mapping as well.
*
* The function expects the i_pages lock to be held.
*/
@@ -293,44 +294,40 @@ static void page_cache_delete_batch(struct address_space *mapping,
{
XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
int total_pages = 0;
- int i = 0;
+ int i = 0, tail_pages = 0;
struct page *page;
mapping_set_update(&xas, mapping);
xas_for_each(&xas, page, ULONG_MAX) {
- if (i >= pagevec_count(pvec))
+ if (i >= pagevec_count(pvec) && !tail_pages)
break;
-
- /* A swap/dax/shadow entry got inserted? Skip it. */
if (xa_is_value(page))
continue;
- /*
- * A page got inserted in our range? Skip it. We have our
- * pages locked so they are protected from being removed.
- * If we see a page whose index is higher than ours, it
- * means our page has been removed, which shouldn't be
- * possible because we're holding the PageLock.
- */
- if (page != pvec->pages[i]) {
- VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
- page);
- continue;
- }
-
- WARN_ON_ONCE(!PageLocked(page));
-
- if (page->index == xas.xa_index)
+ if (!tail_pages) {
+ /*
+ * Some page got inserted in our range? Skip it. We
+ * have our pages locked so they are protected from
+ * being removed.
+ */
+ if (page != pvec->pages[i]) {
+ VM_BUG_ON_PAGE(page->index >
+ pvec->pages[i]->index, page);
+ continue;
+ }
+ WARN_ON_ONCE(!PageLocked(page));
+ if (PageTransHuge(page) && !PageHuge(page))
+ tail_pages = HPAGE_PMD_NR - 1;
page->mapping = NULL;
- /* Leave page->index set: truncation lookup relies on it */
-
- /*
- * Move to the next page in the vector if this is a regular
- * page or the index is of the last sub-page of this compound
- * page.
- */
- if (page->index + (1UL << compound_order(page)) - 1 ==
- xas.xa_index)
+ /*
+ * Leave page->index set: truncation lookup relies
+ * upon it
+ */
i++;
+ } else {
+ VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
+ != pvec->pages[i]->index, page);
+ tail_pages--;
+ }
xas_store(&xas, NULL);
total_pages++;
}
@@ -553,6 +550,28 @@ int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
EXPORT_SYMBOL(filemap_fdatawait_range);
/**
+ * filemap_fdatawait_range_keep_errors - wait for writeback to complete
+ * @mapping: address space structure to wait for
+ * @start_byte: offset in bytes where the range starts
+ * @end_byte: offset in bytes where the range ends (inclusive)
+ *
+ * Walk the list of under-writeback pages of the given address space in the
+ * given range and wait for all of them. Unlike filemap_fdatawait_range(),
+ * this function does not clear error status of the address space.
+ *
+ * Use this function if callers don't handle errors themselves. Expected
+ * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2),
+ * fsfreeze(8)
+ */
+int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
+ loff_t start_byte, loff_t end_byte)
+{
+ __filemap_fdatawait_range(mapping, start_byte, end_byte);
+ return filemap_check_and_keep_errors(mapping);
+}
+EXPORT_SYMBOL(filemap_fdatawait_range_keep_errors);
+
+/**
* file_fdatawait_range - wait for writeback to complete
* @file: file pointing to address space structure to wait for
* @start_byte: offset in bytes where the range starts
@@ -1497,7 +1516,7 @@ EXPORT_SYMBOL(page_cache_prev_miss);
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
{
XA_STATE(xas, &mapping->i_pages, offset);
- struct page *page;
+ struct page *head, *page;
rcu_read_lock();
repeat:
@@ -1512,19 +1531,25 @@ repeat:
if (!page || xa_is_value(page))
goto out;
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
+ goto repeat;
+
+ /* The page was split under us? */
+ if (compound_head(page) != head) {
+ put_page(head);
goto repeat;
+ }
/*
- * Has the page moved or been split?
+ * Has the page moved?
* This is part of the lockless pagecache protocol. See
* include/linux/pagemap.h for details.
*/
if (unlikely(page != xas_reload(&xas))) {
- put_page(page);
+ put_page(head);
goto repeat;
}
- page = find_subpage(page, offset);
out:
rcu_read_unlock();
@@ -1706,6 +1731,7 @@ unsigned find_get_entries(struct address_space *mapping,
rcu_read_lock();
xas_for_each(&xas, page, ULONG_MAX) {
+ struct page *head;
if (xas_retry(&xas, page))
continue;
/*
@@ -1716,13 +1742,17 @@ unsigned find_get_entries(struct address_space *mapping,
if (xa_is_value(page))
goto export;
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
goto retry;
- /* Has the page moved or been split? */
+ /* The page was split under us? */
+ if (compound_head(page) != head)
+ goto put_page;
+
+ /* Has the page moved? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- page = find_subpage(page, xas.xa_index);
export:
indices[ret] = xas.xa_index;
@@ -1731,7 +1761,7 @@ export:
break;
continue;
put_page:
- put_page(page);
+ put_page(head);
retry:
xas_reset(&xas);
}
@@ -1773,27 +1803,33 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
rcu_read_lock();
xas_for_each(&xas, page, end) {
+ struct page *head;
if (xas_retry(&xas, page))
continue;
/* Skip over shadow, swap and DAX entries */
if (xa_is_value(page))
continue;
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
goto retry;
- /* Has the page moved or been split? */
+ /* The page was split under us? */
+ if (compound_head(page) != head)
+ goto put_page;
+
+ /* Has the page moved? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- pages[ret] = find_subpage(page, xas.xa_index);
+ pages[ret] = page;
if (++ret == nr_pages) {
*start = xas.xa_index + 1;
goto out;
}
continue;
put_page:
- put_page(page);
+ put_page(head);
retry:
xas_reset(&xas);
}
@@ -1838,6 +1874,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
rcu_read_lock();
for (page = xas_load(&xas); page; page = xas_next(&xas)) {
+ struct page *head;
if (xas_retry(&xas, page))
continue;
/*
@@ -1847,19 +1884,24 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
if (xa_is_value(page))
break;
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
goto retry;
- /* Has the page moved or been split? */
+ /* The page was split under us? */
+ if (compound_head(page) != head)
+ goto put_page;
+
+ /* Has the page moved? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- pages[ret] = find_subpage(page, xas.xa_index);
+ pages[ret] = page;
if (++ret == nr_pages)
break;
continue;
put_page:
- put_page(page);
+ put_page(head);
retry:
xas_reset(&xas);
}
@@ -1895,6 +1937,7 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
rcu_read_lock();
xas_for_each_marked(&xas, page, end, tag) {
+ struct page *head;
if (xas_retry(&xas, page))
continue;
/*
@@ -1905,21 +1948,26 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
if (xa_is_value(page))
continue;
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
goto retry;
- /* Has the page moved or been split? */
+ /* The page was split under us? */
+ if (compound_head(page) != head)
+ goto put_page;
+
+ /* Has the page moved? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- pages[ret] = find_subpage(page, xas.xa_index);
+ pages[ret] = page;
if (++ret == nr_pages) {
*index = xas.xa_index + 1;
goto out;
}
continue;
put_page:
- put_page(page);
+ put_page(head);
retry:
xas_reset(&xas);
}
@@ -2456,10 +2504,8 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
*
* vma->vm_mm->mmap_sem must be held on entry.
*
- * If our return value has VM_FAULT_RETRY set, it's because
- * lock_page_or_retry() returned 0.
- * The mmap_sem has usually been released in this case.
- * See __lock_page_or_retry() for the exception.
+ * If our return value has VM_FAULT_RETRY set, it's because the mmap_sem
+ * may be dropped before doing I/O or by lock_page_maybe_drop_mmap().
*
* If our return value does not have VM_FAULT_RETRY set, the mmap_sem
* has not been released.
@@ -2602,7 +2648,7 @@ void filemap_map_pages(struct vm_fault *vmf,
pgoff_t last_pgoff = start_pgoff;
unsigned long max_idx;
XA_STATE(xas, &mapping->i_pages, start_pgoff);
- struct page *page;
+ struct page *head, *page;
rcu_read_lock();
xas_for_each(&xas, page, end_pgoff) {
@@ -2611,19 +2657,24 @@ void filemap_map_pages(struct vm_fault *vmf,
if (xa_is_value(page))
goto next;
+ head = compound_head(page);
+
/*
* Check for a locked page first, as a speculative
* reference may adversely influence page migration.
*/
- if (PageLocked(page))
+ if (PageLocked(head))
goto next;
- if (!page_cache_get_speculative(page))
+ if (!page_cache_get_speculative(head))
goto next;
- /* Has the page moved or been split? */
+ /* The page was split under us? */
+ if (compound_head(page) != head)
+ goto skip;
+
+ /* Has the page moved? */
if (unlikely(page != xas_reload(&xas)))
goto skip;
- page = find_subpage(page, xas.xa_index);
if (!PageUptodate(page) ||
PageReadahead(page) ||
@@ -2772,7 +2823,11 @@ repeat:
}
filler:
- err = filler(data, page);
+ if (filler)
+ err = filler(data, page);
+ else
+ err = mapping->a_ops->readpage(data, page);
+
if (err < 0) {
put_page(page);
return ERR_PTR(err);
@@ -2862,7 +2917,8 @@ struct page *read_cache_page(struct address_space *mapping,
int (*filler)(void *, struct page *),
void *data)
{
- return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
+ return do_read_cache_page(mapping, index, filler, data,
+ mapping_gfp_mask(mapping));
}
EXPORT_SYMBOL(read_cache_page);
@@ -2883,9 +2939,7 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
pgoff_t index,
gfp_t gfp)
{
- filler_t *filler = (filler_t *)mapping->a_ops->readpage;
-
- return do_read_cache_page(mapping, index, filler, NULL, gfp);
+ return do_read_cache_page(mapping, index, NULL, NULL, gfp);
}
EXPORT_SYMBOL(read_cache_page_gfp);
@@ -2894,24 +2948,11 @@ EXPORT_SYMBOL(read_cache_page_gfp);
* LFS limits. If pos is under the limit it becomes a short access. If it
* exceeds the limit we return -EFBIG.
*/
-static int generic_access_check_limits(struct file *file, loff_t pos,
- loff_t *count)
-{
- struct inode *inode = file->f_mapping->host;
- loff_t max_size = inode->i_sb->s_maxbytes;
-
- if (!(file->f_flags & O_LARGEFILE))
- max_size = MAX_NON_LFS;
-
- if (unlikely(pos >= max_size))
- return -EFBIG;
- *count = min(*count, max_size - pos);
- return 0;
-}
-
static int generic_write_check_limits(struct file *file, loff_t pos,
loff_t *count)
{
+ struct inode *inode = file->f_mapping->host;
+ loff_t max_size = inode->i_sb->s_maxbytes;
loff_t limit = rlimit(RLIMIT_FSIZE);
if (limit != RLIM_INFINITY) {
@@ -2922,7 +2963,15 @@ static int generic_write_check_limits(struct file *file, loff_t pos,
*count = min(*count, limit - pos);
}
- return generic_access_check_limits(file, pos, count);
+ if (!(file->f_flags & O_LARGEFILE))
+ max_size = MAX_NON_LFS;
+
+ if (unlikely(pos >= max_size))
+ return -EFBIG;
+
+ *count = min(*count, max_size - pos);
+
+ return 0;
}
/*
@@ -2962,7 +3011,7 @@ EXPORT_SYMBOL(generic_write_checks);
/*
* Performs necessary checks before doing a clone.
*
- * Can adjust amount of bytes to clone.
+ * Can adjust amount of bytes to clone via @req_count argument.
* Returns appropriate error code that caller should return or
* zero in case the clone should be allowed.
*/
@@ -3000,10 +3049,6 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
return -EINVAL;
count = min(count, size_in - (uint64_t)pos_in);
- ret = generic_access_check_limits(file_in, pos_in, &count);
- if (ret)
- return ret;
-
ret = generic_write_check_limits(file_out, pos_out, &count);
if (ret)
return ret;
@@ -3040,6 +3085,83 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
return 0;
}
+
+/*
+ * Performs common checks before doing a file copy/clone
+ * from @file_in to @file_out.
+ */
+int generic_file_rw_checks(struct file *file_in, struct file *file_out)
+{
+ struct inode *inode_in = file_inode(file_in);
+ struct inode *inode_out = file_inode(file_out);
+
+ /* Don't copy dirs, pipes, sockets... */
+ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+ return -EISDIR;
+ if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+ return -EINVAL;
+
+ if (!(file_in->f_mode & FMODE_READ) ||
+ !(file_out->f_mode & FMODE_WRITE) ||
+ (file_out->f_flags & O_APPEND))
+ return -EBADF;
+
+ return 0;
+}
+
+/*
+ * Performs necessary checks before doing a file copy
+ *
+ * Can adjust amount of bytes to copy via @req_count argument.
+ * Returns appropriate error code that caller should return or
+ * zero in case the copy should be allowed.
+ */
+int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t *req_count, unsigned int flags)
+{
+ struct inode *inode_in = file_inode(file_in);
+ struct inode *inode_out = file_inode(file_out);
+ uint64_t count = *req_count;
+ loff_t size_in;
+ int ret;
+
+ ret = generic_file_rw_checks(file_in, file_out);
+ if (ret)
+ return ret;
+
+ /* Don't touch certain kinds of inodes */
+ if (IS_IMMUTABLE(inode_out))
+ return -EPERM;
+
+ if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
+ return -ETXTBSY;
+
+ /* Ensure offsets don't wrap. */
+ if (pos_in + count < pos_in || pos_out + count < pos_out)
+ return -EOVERFLOW;
+
+ /* Shorten the copy to EOF */
+ size_in = i_size_read(inode_in);
+ if (pos_in >= size_in)
+ count = 0;
+ else
+ count = min(count, size_in - (uint64_t)pos_in);
+
+ ret = generic_write_check_limits(file_out, pos_out, &count);
+ if (ret)
+ return ret;
+
+ /* Don't allow overlapped copying within the same file. */
+ if (inode_in == inode_out &&
+ pos_out + count > pos_in &&
+ pos_out < pos_in + count)
+ return -EINVAL;
+
+ *req_count = count;
+ return 0;
+}
+
int pagecache_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)