/* * cache.c - block cache with red-black trees * Copyright (C) 2006 Thorsten Zitterell */ #include #include #include #include #include /* use red-black library of kernel */ #include #include #include #include "../common/dnbd-cliserv.h" #include "cache.h" /* empty node */ #define rb_entry_cn(node) rb_entry((node), struct cache_node, rb_node) /* dummy operations of no cache is used */ int dnbd_cache_dummy_search(dnbd_cache_t * cache, struct request *req) { return 0; } int dnbd_cache_dummy_insert(dnbd_cache_t * cache, sector_t block, void *buf) { return 0; } void dnbd_cache_dummy_clean(dnbd_cache_t * cache) { return; } int dnbd_cache_search(dnbd_cache_t * cache, struct request *req) { /* hold segment as we copy from user space */ mm_segment_t old_fs = get_fs(); size_t blksize; void *kaddr; #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)) int i; struct bio *bio; #else struct req_iterator iter; #endif struct bio_vec *bvec; int result = 0, rbytes; struct rb_node *n; struct cache_node *cn; loff_t offset; char *buf; down(&cache->sema); n = cache->root.rb_node; blksize = cache->blksize; /* search for block */ while (n) { cn = rb_entry_cn(n); if (req->sector < cn->rb_key) n = n->rb_left; else if (req->sector > cn->rb_key) n = n->rb_right; else goto found; } /* block is not cached */ cache->miss++; goto out; /* cached block was found */ found: cache->hits++; offset = cn->rb_data * blksize; #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)) rq_for_each_bio(bio, req) { bio_for_each_segment(bvec, bio, i) { #else rq_for_each_segment(bvec, req, iter) { #endif if (bvec->bv_len > blksize) { printk(KERN_WARNING "bvec->bv_len greater than cache block size\n"); goto out; } /* copy cached block from cache file */ set_fs(get_ds()); buf = req->buffer; kaddr = kmap(bvec->bv_page); rbytes = vfs_read(cache->filp, kaddr + bvec->bv_offset, bvec->bv_len, &offset); kunmap(bvec->bv_page); set_fs(old_fs); /* error on reading? */ if (rbytes != bio_iovec(req->bio)->bv_len) { printk ("dnbd: ERROR reading from cache!\n"); result = 0; goto out; } result += rbytes; if (result == blksize) goto out; #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)) } #endif } out: up(&cache->sema); /* return number of copied sectors */ return result >> 9; } int dnbd_cache_insert(dnbd_cache_t * cache, sector_t sector, void *buf) { mm_segment_t old_fs = get_fs(); int result = 0; struct rb_node **p; struct rb_node *parent = NULL; cache_node_t *__cn, *cn; sector_t act_block; loff_t offset; down(&cache->sema); p = &cache->root.rb_node; /* red-black tree search and replacement in O(log n) */ /* check if node was already inserted to cache and, if necessary, do LRU replacement */ while (*p) { parent = *p; __cn = rb_entry_cn(parent); if (sector < __cn->rb_key) p = &(*p)->rb_left; else if (sector > __cn->rb_key) p = &(*p)->rb_right; else { /* the sector was already added to cache */ /* LRU replacement policy */ if (__cn->prev) __cn->prev->next = __cn->next; else /* __cn is head - do nothing */ goto no_lru; if (__cn->next) __cn->next->prev = __cn->prev; else /* __cn is tail - so set new one */ cache->tail = (__cn->prev ? __cn->prev : __cn); /* insert new node to head: head of list has no predecessor, set previous node to NULL and next node to old head and set new head */ __cn->prev = NULL; __cn->next = cache->head; if (cache->head) cache->head->prev = __cn; cache->head = __cn; cache->lru++; no_lru: result = 1; goto out; } } /* check if cache is full */ if (cache->used_blocks == cache->max_blocks) { /* remove LRU node (cn), but keep reserved data structure in memory */ cn = cache->tail; cache->tail->prev->next = NULL; cache->tail = cache->tail->prev; /* Node (which is tail of LRU list) will be erased from tree which is then rebalanced. Re-finding a parent for the a node is mandatory. */ act_block = cn->rb_data; rb_erase(&cn->rb_node, &cache->root); p = &cache->root.rb_node; while (*p) { parent = *p; __cn = rb_entry_cn(parent); if (sector < __cn->rb_key) p = &(*p)->rb_left; else p = &(*p)->rb_right; } } else { /* cache is not full, so reserve memory for red-black tree node */ if (! (cn = (cache_node_t *) kmalloc(sizeof(cache_node_t), GFP_KERNEL))) { result = -ENOMEM; goto out; } act_block = cache->used_blocks; /* cn = &cache->nodes[act_block]; */ cache->used_blocks++; } /* write block to cache file */ offset = act_block * cache->blksize; set_fs(get_ds()); result = vfs_write(cache->filp, buf, cache->blksize, &offset); set_fs(old_fs); if (result != cache->blksize) { printk("dnbd: ERROR writing to cache!\n"); cache->used_blocks--; kfree(cn); goto out; } /* cn (current node) points to an empty node, now */ cn->rb_key = sector; cn->rb_data = act_block; /* current node (which will become the new head) has no predecessor */ cn->prev = NULL; cn->next = cache->head; /* adapt head element - if it exists */ if (cache->head) cache->head->prev = cn; /* set new head */ cache->head = cn; /* set new tail */ if (!cache->tail) cache->tail = cn; /* call kernel helpers for red-black trees */ rb_link_node(&cn->rb_node, parent, p); rb_insert_color(&cn->rb_node, &cache->root); out: up(&cache->sema); return result; } int dnbd_cache_init(dnbd_cache_t * cache) { int result = -EINVAL; /* initialize cache */ cache->active = 0; /* set dummy function, if no cache is used */ cache->insert = &dnbd_cache_dummy_insert; cache->search = &dnbd_cache_dummy_search; cache->clean = &dnbd_cache_dummy_clean; cache->root.rb_node = NULL; cache->max_blocks = 0; cache->used_blocks = 0; cache->blksize = 0; cache->hits = 0; cache->miss = 0; cache->lru = 0; cache->filp = NULL; cache->fname = NULL; cache->head = NULL; cache->tail = NULL; spin_lock_init(&cache->lock); init_MUTEX(&cache->sema); result = 0; return result; } void dnbd_cache_clean(dnbd_cache_t * cache) { cache_node_t *node; cache_node_t *tmp; int cnt = 0; spin_lock(&cache->lock); node = cache->head; cache->head = NULL; cache->tail = NULL; if (cache->fname) kfree(cache->fname); /* free memory of all nodes; start with head */ while (node) { tmp = node; node = node->next; kfree(tmp); cnt++; } printk(KERN_INFO "dnbd: freed %i cache nodes\n", cnt); cache->active = 0; spin_unlock(&cache->lock); } int dnbd_cache_set(dnbd_cache_t * cache, struct dnbd_file __user * arg, size_t blksize) { int result = -EINVAL; struct dnbd_file cfile; struct kstat stat; loff_t div1; size_t div2; if (cache->active) { printk(KERN_ERR "dnbd: cachefile is already set!\n"); return -EFAULT; } /* open, verify and set cache file */ if (copy_from_user(&cfile, arg, sizeof(cfile))) return -EFAULT; if (!(cache->fname = (char *) kmalloc(cfile.len + 1, GFP_KERNEL))) return -ENOMEM; if (copy_from_user (cache->fname, (void __user *) cfile.name, cfile.len)) { result = -EFAULT; goto out_free; } *(cache->fname + cfile.len) = 0; printk(KERN_INFO "dnbd: setting cachefile to %s\n", cache->fname); cache->filp = filp_open(cache->fname, O_RDWR | O_LARGEFILE, 0); if (cache->filp == NULL || IS_ERR(cache->filp)) { printk(KERN_ERR "dnbd: ERROR opening cache file!\n"); result = -EINVAL; goto out_free; } generic_fillattr(cache->filp->f_dentry->d_inode, &stat); div1 = stat.size; div2 = blksize; do_div(div1, div2); printk(KERN_INFO "dnbd: cachefile size %llu KB using %llu blocks a %i bytes for caching.\n", stat.size >> 10, div1, blksize); cache->max_blocks = div1; cache->blksize = blksize; /* activate cache and adapt function for insert, search and clean up */ cache->active = 1; cache->insert = &dnbd_cache_insert; cache->search = &dnbd_cache_search; cache->clean = &dnbd_cache_clean; result = 0; goto out; out_free: kfree(cache->fname); out: if (result < 0 && cache->filp) fput(cache->filp); return result; }