summaryrefslogblamecommitdiffstats
path: root/kernel/cache.c
blob: b47e32b78593a1730cbe82112f1af013b811ac8d (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12











                                                                
                          































                                                                            





                                                         






























                                                  
                                                         

                                                    


                                              

























                                                                                       
                                                         
                 
              


























































































































































































































































































                                                                                          
/*
 * cache.c - block cache with red-black trees
 * Copyright (C) 2006 Thorsten Zitterell <thorsten@zitterell.de>
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/highmem.h>
#include <linux/fs.h>
#include <linux/file.h>
/* use red-black library of kernel */
#include <linux/rbtree.h>
#include <linux/version.h>
#include <asm/uaccess.h>

#include "../common/dnbd-cliserv.h"
#include "cache.h"

/* empty node */
#define rb_entry_cn(node)	rb_entry((node), struct cache_node, rb_node)

/* dummy operations of no cache is used */
int dnbd_cache_dummy_search(dnbd_cache_t * cache, struct request *req)
{
	return 0;
}

int dnbd_cache_dummy_insert(dnbd_cache_t * cache, sector_t block,
			    void *buf)
{
	return 0;
}

void dnbd_cache_dummy_clean(dnbd_cache_t * cache)
{
	return;
}

int dnbd_cache_search(dnbd_cache_t * cache, struct request *req)
{
	/* hold segment as we copy from user space */
	mm_segment_t old_fs = get_fs();
	size_t blksize;
	void *kaddr;

	#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24))
		int i;
		struct bio *bio;
	#else
		struct req_iterator iter;
	#endif
	struct bio_vec *bvec;

	int result = 0, rbytes;
	struct rb_node *n;
	struct cache_node *cn;
	loff_t offset;
	char *buf;

	down(&cache->sema);
	n = cache->root.rb_node;
	blksize = cache->blksize;

	/* search for block */
	while (n) {
		cn = rb_entry_cn(n);

		if (req->sector < cn->rb_key)
			n = n->rb_left;
		else if (req->sector > cn->rb_key)
			n = n->rb_right;
		else
			goto found;
	}
	/* block is not cached */
	cache->miss++;
	goto out;

	/* cached block was found */
      found:
	cache->hits++;
	offset = cn->rb_data * blksize;
	#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24))
	rq_for_each_bio(bio, req) {
		bio_for_each_segment(bvec, bio, i) {
	#else
	rq_for_each_segment(bvec, req, iter) {
	#endif
			if (bvec->bv_len > blksize) {
				printk(KERN_WARNING
				       "bvec->bv_len greater than cache block size\n");
				goto out;
			}
			/* copy cached block from cache file */
			set_fs(get_ds());
			buf = req->buffer;
			kaddr = kmap(bvec->bv_page);
			rbytes =
			    vfs_read(cache->filp, kaddr + bvec->bv_offset,
				     bvec->bv_len, &offset);
			kunmap(bvec->bv_page);
			set_fs(old_fs);

			/* error on reading? */
			if (rbytes != bio_iovec(req->bio)->bv_len) {
				printk
				    ("dnbd: ERROR reading from cache!\n");
				result = 0;
				goto out;
			}

			result += rbytes;
			if (result == blksize)
				goto out;
	#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24))
		}
	#endif
	}

      out:
	up(&cache->sema);

	/* return number of copied sectors */
	return result >> 9;
}

int dnbd_cache_insert(dnbd_cache_t * cache, sector_t sector, void *buf)
{
	mm_segment_t old_fs = get_fs();
	int result = 0;
	struct rb_node **p;
	struct rb_node *parent = NULL;
	cache_node_t *__cn, *cn;
	sector_t act_block;

	loff_t offset;

	down(&cache->sema);
	p = &cache->root.rb_node;

	/* red-black tree search and replacement in O(log n) */
	
	/* check if node was already inserted to cache and,
	   if necessary, do LRU replacement */
	
	while (*p) {
		parent = *p;
		__cn = rb_entry_cn(parent);
		if (sector < __cn->rb_key)
			p = &(*p)->rb_left;
		else if (sector > __cn->rb_key)
			p = &(*p)->rb_right;
		else {
			/* the sector was already added to cache */

			/* LRU replacement policy */
			if (__cn->prev)
				__cn->prev->next = __cn->next;
			else
				/* __cn is head - do nothing */
				goto no_lru;

			if (__cn->next)
				__cn->next->prev = __cn->prev;
			else
				/* __cn is tail - so set new one */
				cache->tail =
				    (__cn->prev ? __cn->prev : __cn);

			/* insert new node to head:
			   head of list has no predecessor, 
			   set previous node to NULL and next 
			   node to old head and set new head */

			__cn->prev = NULL;
			__cn->next = cache->head;

			if (cache->head)
				cache->head->prev = __cn;

			cache->head = __cn;

			cache->lru++;
		      no_lru:
			result = 1;
			goto out;
		}
	}

	/* check if cache is full */
	if (cache->used_blocks == cache->max_blocks) {
		/* 
		   remove LRU node (cn), but keep reserved 
		   data structure in memory 
		*/
		cn = cache->tail;
		cache->tail->prev->next = NULL;
		cache->tail = cache->tail->prev;

		/*
		   Node (which is tail of LRU list) will be erased from tree 
		   which is then rebalanced. 
		   Re-finding a parent for the a node is mandatory.
		 */
		act_block = cn->rb_data;
		rb_erase(&cn->rb_node, &cache->root);
		p = &cache->root.rb_node;
		while (*p) {
			parent = *p;
			__cn = rb_entry_cn(parent);
			if (sector < __cn->rb_key)
				p = &(*p)->rb_left;
			else
				p = &(*p)->rb_right;
		}
	} else {
		/* cache is not full, so reserve memory for red-black tree node */
		if (!
		    (cn =
		     (cache_node_t *) kmalloc(sizeof(cache_node_t),
					      GFP_KERNEL))) {
			result = -ENOMEM;
			goto out;
		}
		act_block = cache->used_blocks;
		/* cn = &cache->nodes[act_block]; */
		cache->used_blocks++;
	}
	
	/* write block to cache file */	
	offset = act_block * cache->blksize;
	set_fs(get_ds());
	result = vfs_write(cache->filp, buf, cache->blksize, &offset);
	set_fs(old_fs);

	if (result != cache->blksize) {
		printk("dnbd: ERROR writing to cache!\n");
		cache->used_blocks--;
		kfree(cn);
		goto out;
	}
	
	/* cn (current node) points to an empty node, now */
	cn->rb_key = sector;
	cn->rb_data = act_block;

	/* current node (which will become the new head) has no predecessor */
	cn->prev = NULL;
	cn->next = cache->head;

	/* adapt head element - if it exists */
	if (cache->head)
		cache->head->prev = cn;

	/* set new head */
	cache->head = cn;

	/* set new tail */
	if (!cache->tail)
		cache->tail = cn;

	/* call kernel helpers for red-black trees */
	rb_link_node(&cn->rb_node, parent, p);
	rb_insert_color(&cn->rb_node, &cache->root);

      out:
	up(&cache->sema);
	return result;
}

int dnbd_cache_init(dnbd_cache_t * cache)
{
	int result = -EINVAL;

	/* initialize cache */
	cache->active = 0;
	
	/* set dummy function, if no cache is used */
	cache->insert = &dnbd_cache_dummy_insert;
	cache->search = &dnbd_cache_dummy_search;
	cache->clean = &dnbd_cache_dummy_clean;

	cache->root.rb_node = NULL;

	cache->max_blocks = 0;
	cache->used_blocks = 0;
	cache->blksize = 0;

	cache->hits = 0;
	cache->miss = 0;
	cache->lru = 0;

	cache->filp = NULL;
	cache->fname = NULL;

	cache->head = NULL;
	cache->tail = NULL;
	spin_lock_init(&cache->lock);
	init_MUTEX(&cache->sema);

	result = 0;
	return result;
}

void dnbd_cache_clean(dnbd_cache_t * cache)
{
	cache_node_t *node;
	cache_node_t *tmp;
	int cnt = 0;

	spin_lock(&cache->lock);
	node = cache->head;

	cache->head = NULL;
	cache->tail = NULL;

	if (cache->fname)
		kfree(cache->fname);

	/* free memory of all nodes; start with head */
	while (node) {
		tmp = node;
		node = node->next;
		kfree(tmp);
		cnt++;
	}
	printk(KERN_INFO "dnbd: freed %i cache nodes\n", cnt);

	cache->active = 0;
	spin_unlock(&cache->lock);

}

int dnbd_cache_set(dnbd_cache_t * cache, struct dnbd_file __user * arg,
		   size_t blksize)
{
	int result = -EINVAL;
	struct dnbd_file cfile;
	struct kstat stat;
	loff_t div1;
	size_t div2;

	if (cache->active) {
		printk(KERN_ERR "dnbd: cachefile is already set!\n");
		return -EFAULT;
	}

	/* open, verify and set cache file */
	if (copy_from_user(&cfile, arg, sizeof(cfile)))
		return -EFAULT;

	if (!(cache->fname = (char *) kmalloc(cfile.len + 1, GFP_KERNEL)))
		return -ENOMEM;

	if (copy_from_user
	    (cache->fname, (void __user *) cfile.name, cfile.len)) {
		result = -EFAULT;
		goto out_free;
	}
	*(cache->fname + cfile.len) = 0;

	printk(KERN_INFO "dnbd: setting cachefile to %s\n", cache->fname);

	cache->filp = filp_open(cache->fname, O_RDWR | O_LARGEFILE, 0);

	if (cache->filp == NULL || IS_ERR(cache->filp)) {
		printk(KERN_ERR "dnbd: ERROR opening cache file!\n");
		result = -EINVAL;
		goto out_free;
	}

	generic_fillattr(cache->filp->f_dentry->d_inode, &stat);

	div1 = stat.size;
	div2 = blksize;
	do_div(div1, div2);

	printk(KERN_INFO
	       "dnbd: cachefile size %llu KB using %llu blocks a %i bytes for caching.\n",
	       stat.size >> 10, div1, blksize);

	cache->max_blocks = div1;
	cache->blksize = blksize;

	/* activate cache and adapt function for insert, search and clean up */
	cache->active = 1;
	cache->insert = &dnbd_cache_insert;
	cache->search = &dnbd_cache_search;
	cache->clean = &dnbd_cache_clean;

	result = 0;
	goto out;

      out_free:
	kfree(cache->fname);
      out:
	if (result < 0 && cache->filp)
		fput(cache->filp);
	return result;
}