summaryrefslogtreecommitdiffstats
path: root/drivers/staging/erofs/utils.c
diff options
context:
space:
mode:
authorGao Xiang2018-07-26 14:22:06 +0200
committerGreg Kroah-Hartman2018-07-27 17:24:10 +0200
commit3883a79abd02272222a214a5f84395d41eecdc84 (patch)
tree20b11dc7d7ddf0cb76f66523acc27d91c5329459 /drivers/staging/erofs/utils.c
parentstaging: erofs: introduce workstation for decompression (diff)
downloadkernel-qcow2-linux-3883a79abd02272222a214a5f84395d41eecdc84.tar.gz
kernel-qcow2-linux-3883a79abd02272222a214a5f84395d41eecdc84.tar.xz
kernel-qcow2-linux-3883a79abd02272222a214a5f84395d41eecdc84.zip
staging: erofs: introduce VLE decompression support
This patch introduces the basic in-place VLE decompression implementation for the erofs file system. Compared with fixed-sized input compression, it implements what we call 'the variable-length extent compression' which specifies the same output size for each compression block to make the full use of IO bandwidth (which means almost all data from block device can be directly used for decomp- ression), improve the real (rather than just via data caching, which costs more memory) random read and keep the relatively lower compression ratios (it saves more storage space than fixed-sized input compression which is also configured with the same input block size), as illustrated below: |--- variable-length extent ---|------ VLE ------|--- VLE ---| /> clusterofs /> clusterofs /> clusterofs /> clusterofs ++---|-------++-----------++---------|-++-----------++-|---------++-| ...|| | || || | || || | || | ... original data ++---|-------++-----------++---------|-++-----------++-|---------++-| ++->cluster<-++->cluster<-++->cluster<-++->cluster<-++->cluster<-++ size size size size size \ / / / \ / / / \ / / / ++-----------++-----------++-----------++ ... || || || || ... compressed clusters ++-----------++-----------++-----------++ ++->cluster<-++->cluster<-++->cluster<-++ size size size The main point of 'in-place' refers to the decompression mode: Instead of allocating independent compressed pages and data structures, it reuses the allocated file cache pages at most to store its compressed data and the corresponding pagevec in a time-sharing approach by default, which will be useful for low memory scenario. In the end, unlike the other filesystems with (de)compression support using a relatively large compression block size, which reads and decompresses >= 128KB at once, and gains a more good-looking random read (In fact it collects small random reads into large sequential reads and caches all decompressed data in memory, but it is unacceptable especially for embedded devices with limited memory, and it is not the real random read), we select a universal small-sized 4KB compressed cluster, which is the smallest page size for most architectures, and all compressed clusters can be read and decompressed independently, which ensures random read number for all use cases. Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/staging/erofs/utils.c')
-rw-r--r--drivers/staging/erofs/utils.c61
1 files changed, 60 insertions, 1 deletions
diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
index 0d4eae2f79a8..6530035f8a61 100644
--- a/drivers/staging/erofs/utils.c
+++ b/drivers/staging/erofs/utils.c
@@ -12,6 +12,7 @@
*/
#include "internal.h"
+#include <linux/pagevec.h>
struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
{
@@ -98,11 +99,69 @@ int erofs_register_workgroup(struct super_block *sb,
return err;
}
+extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
+
+int erofs_workgroup_put(struct erofs_workgroup *grp)
+{
+ int count = atomic_dec_return(&grp->refcount);
+
+ if (count == 1)
+ atomic_long_inc(&erofs_global_shrink_cnt);
+ else if (!count) {
+ atomic_long_dec(&erofs_global_shrink_cnt);
+ erofs_workgroup_free_rcu(grp);
+ }
+ return count;
+}
+
unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
unsigned long nr_shrink,
bool cleanup)
{
- return 0;
+ pgoff_t first_index = 0;
+ void *batch[PAGEVEC_SIZE];
+ unsigned freed = 0;
+
+ int i, found;
+repeat:
+ erofs_workstn_lock(sbi);
+
+ found = radix_tree_gang_lookup(&sbi->workstn_tree,
+ batch, first_index, PAGEVEC_SIZE);
+
+ for (i = 0; i < found; ++i) {
+ int cnt;
+ struct erofs_workgroup *grp = (void *)
+ ((unsigned long)batch[i] &
+ ~RADIX_TREE_EXCEPTIONAL_ENTRY);
+
+ first_index = grp->index + 1;
+
+ cnt = atomic_read(&grp->refcount);
+ BUG_ON(cnt <= 0);
+
+ if (cleanup)
+ BUG_ON(cnt != 1);
+
+ else if (cnt > 1)
+ continue;
+
+ if (radix_tree_delete(&sbi->workstn_tree,
+ grp->index) != grp)
+ continue;
+
+ /* (rarely) grabbed again when freeing */
+ erofs_workgroup_put(grp);
+
+ ++freed;
+ if (unlikely(!--nr_shrink))
+ break;
+ }
+ erofs_workstn_unlock(sbi);
+
+ if (i && nr_shrink)
+ goto repeat;
+ return freed;
}
#endif