From 432d268c0552fd30c8be564f7ea2504a2b546101 Mon Sep 17 00:00:00 2001 From: Jun Nakajima Date: Tue, 31 Aug 2010 16:41:25 +0100 Subject: xen: Introduce the Xen mapcache On IA32 host or IA32 PAE host, at present, generally, we can't create an HVM guest with more than 2G memory, because generally it's almost impossible for Qemu to find a large enough and consecutive virtual address space to map an HVM guest's whole physical address space. The attached patch fixes this issue using dynamic mapping based on little blocks of memory. Each call to qemu_get_ram_ptr makes a call to qemu_map_cache with the lock option, so mapcache will not unmap these ram_ptr. Blocks that do not belong to the RAM, but usually to a device ROM or to a framebuffer, are handled in a separate function. So the whole RAMBlock can be map. Signed-off-by: Jun Nakajima Signed-off-by: Anthony PERARD Signed-off-by: Stefano Stabellini Signed-off-by: Alexander Graf --- xen-mapcache.c | 349 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 349 insertions(+) create mode 100644 xen-mapcache.c (limited to 'xen-mapcache.c') diff --git a/xen-mapcache.c b/xen-mapcache.c new file mode 100644 index 0000000000..a539358fb7 --- /dev/null +++ b/xen-mapcache.c @@ -0,0 +1,349 @@ +/* + * Copyright (C) 2011 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "config.h" + +#include + +#include "hw/xen_backend.h" +#include "blockdev.h" + +#include +#include + +#include "xen-mapcache.h" +#include "trace.h" + + +//#define MAPCACHE_DEBUG + +#ifdef MAPCACHE_DEBUG +# define DPRINTF(fmt, ...) do { \ + fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \ +} while (0) +#else +# define DPRINTF(fmt, ...) do { } while (0) +#endif + +#if defined(__i386__) +# define MCACHE_BUCKET_SHIFT 16 +#elif defined(__x86_64__) +# define MCACHE_BUCKET_SHIFT 20 +#endif +#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) + +#define BITS_PER_LONG (sizeof(long) * 8) +#define BITS_TO_LONGS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG) +#define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)] + +typedef struct MapCacheEntry { + target_phys_addr_t paddr_index; + uint8_t *vaddr_base; + DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT); + uint8_t lock; + struct MapCacheEntry *next; +} MapCacheEntry; + +typedef struct MapCacheRev { + uint8_t *vaddr_req; + target_phys_addr_t paddr_index; + QTAILQ_ENTRY(MapCacheRev) next; +} MapCacheRev; + +typedef struct MapCache { + MapCacheEntry *entry; + unsigned long nr_buckets; + QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries; + + /* For most cases (>99.9%), the page address is the same. */ + target_phys_addr_t last_address_index; + uint8_t *last_address_vaddr; + unsigned long max_mcache_size; + unsigned int mcache_bucket_shift; +} MapCache; + +static MapCache *mapcache; + +static inline int test_bit(unsigned int bit, const unsigned long *map) +{ + return !!((map)[(bit) / BITS_PER_LONG] & (1UL << ((bit) % BITS_PER_LONG))); +} + +void qemu_map_cache_init(void) +{ + unsigned long size; + struct rlimit rlimit_as; + + mapcache = qemu_mallocz(sizeof (MapCache)); + + QTAILQ_INIT(&mapcache->locked_entries); + mapcache->last_address_index = -1; + + getrlimit(RLIMIT_AS, &rlimit_as); + rlimit_as.rlim_cur = rlimit_as.rlim_max; + setrlimit(RLIMIT_AS, &rlimit_as); + mapcache->max_mcache_size = rlimit_as.rlim_max; + + mapcache->nr_buckets = + (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) + + (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >> + (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); + + size = mapcache->nr_buckets * sizeof (MapCacheEntry); + size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1); + DPRINTF("qemu_map_cache_init, nr_buckets = %lx size %lu\n", mapcache->nr_buckets, size); + mapcache->entry = qemu_mallocz(size); +} + +static void qemu_remap_bucket(MapCacheEntry *entry, + target_phys_addr_t size, + target_phys_addr_t address_index) +{ + uint8_t *vaddr_base; + xen_pfn_t *pfns; + int *err; + unsigned int i, j; + target_phys_addr_t nb_pfn = size >> XC_PAGE_SHIFT; + + trace_qemu_remap_bucket(address_index); + + pfns = qemu_mallocz(nb_pfn * sizeof (xen_pfn_t)); + err = qemu_mallocz(nb_pfn * sizeof (int)); + + if (entry->vaddr_base != NULL) { + if (munmap(entry->vaddr_base, size) != 0) { + perror("unmap fails"); + exit(-1); + } + } + + for (i = 0; i < nb_pfn; i++) { + pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; + } + + vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE, + pfns, err, nb_pfn); + if (vaddr_base == NULL) { + perror("xc_map_foreign_bulk"); + exit(-1); + } + + entry->vaddr_base = vaddr_base; + entry->paddr_index = address_index; + + for (i = 0; i < nb_pfn; i += BITS_PER_LONG) { + unsigned long word = 0; + if ((i + BITS_PER_LONG) > nb_pfn) { + j = nb_pfn % BITS_PER_LONG; + } else { + j = BITS_PER_LONG; + } + while (j > 0) { + word = (word << 1) | !err[i + --j]; + } + entry->valid_mapping[i / BITS_PER_LONG] = word; + } + + qemu_free(pfns); + qemu_free(err); +} + +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock) +{ + MapCacheEntry *entry, *pentry = NULL; + target_phys_addr_t address_index = phys_addr >> MCACHE_BUCKET_SHIFT; + target_phys_addr_t address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1); + + trace_qemu_map_cache(phys_addr); + + if (address_index == mapcache->last_address_index && !lock) { + trace_qemu_map_cache_return(mapcache->last_address_vaddr + address_offset); + return mapcache->last_address_vaddr + address_offset; + } + + entry = &mapcache->entry[address_index % mapcache->nr_buckets]; + + while (entry && entry->lock && entry->paddr_index != address_index && entry->vaddr_base) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + entry = qemu_mallocz(sizeof (MapCacheEntry)); + pentry->next = entry; + qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index); + } else if (!entry->lock) { + if (!entry->vaddr_base || entry->paddr_index != address_index || + !test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) { + qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index); + } + } + + if (!test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) { + mapcache->last_address_index = -1; + trace_qemu_map_cache_return(NULL); + return NULL; + } + + mapcache->last_address_index = address_index; + mapcache->last_address_vaddr = entry->vaddr_base; + if (lock) { + MapCacheRev *reventry = qemu_mallocz(sizeof(MapCacheRev)); + entry->lock++; + reventry->vaddr_req = mapcache->last_address_vaddr + address_offset; + reventry->paddr_index = mapcache->last_address_index; + QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next); + } + + trace_qemu_map_cache_return(mapcache->last_address_vaddr + address_offset); + return mapcache->last_address_vaddr + address_offset; +} + +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) +{ + MapCacheRev *reventry; + target_phys_addr_t paddr_index; + int found = 0; + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == ptr) { + paddr_index = reventry->paddr_index; + found = 1; + break; + } + } + if (!found) { + fprintf(stderr, "qemu_ram_addr_from_mapcache, could not find %p\n", ptr); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, + reventry->vaddr_req); + } + abort(); + return 0; + } + + return paddr_index << MCACHE_BUCKET_SHIFT; +} + +void qemu_invalidate_entry(uint8_t *buffer) +{ + MapCacheEntry *entry = NULL, *pentry = NULL; + MapCacheRev *reventry; + target_phys_addr_t paddr_index; + int found = 0; + + if (mapcache->last_address_vaddr == buffer) { + mapcache->last_address_index = -1; + } + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == buffer) { + paddr_index = reventry->paddr_index; + found = 1; + break; + } + } + if (!found) { + DPRINTF("qemu_invalidate_entry, could not find %p\n", buffer); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); + } + return; + } + QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); + qemu_free(reventry); + + entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; + while (entry && entry->paddr_index != paddr_index) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer); + return; + } + entry->lock--; + if (entry->lock > 0 || pentry == NULL) { + return; + } + + pentry->next = entry->next; + if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) { + perror("unmap fails"); + exit(-1); + } + qemu_free(entry); +} + +void qemu_invalidate_map_cache(void) +{ + unsigned long i; + MapCacheRev *reventry; + + /* Flush pending AIO before destroying the mapcache */ + qemu_aio_flush(); + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF("There should be no locked mappings at this time, " + "but "TARGET_FMT_plx" -> %p is present\n", + reventry->paddr_index, reventry->vaddr_req); + } + + mapcache_lock(); + + for (i = 0; i < mapcache->nr_buckets; i++) { + MapCacheEntry *entry = &mapcache->entry[i]; + + if (entry->vaddr_base == NULL) { + continue; + } + + if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) { + perror("unmap fails"); + exit(-1); + } + + entry->paddr_index = 0; + entry->vaddr_base = NULL; + } + + mapcache->last_address_index = -1; + mapcache->last_address_vaddr = NULL; + + mapcache_unlock(); +} + +uint8_t *xen_map_block(target_phys_addr_t phys_addr, target_phys_addr_t size) +{ + uint8_t *vaddr_base; + xen_pfn_t *pfns; + int *err; + unsigned int i; + target_phys_addr_t nb_pfn = size >> XC_PAGE_SHIFT; + + trace_xen_map_block(phys_addr, size); + phys_addr >>= XC_PAGE_SHIFT; + + pfns = qemu_mallocz(nb_pfn * sizeof (xen_pfn_t)); + err = qemu_mallocz(nb_pfn * sizeof (int)); + + for (i = 0; i < nb_pfn; i++) { + pfns[i] = phys_addr + i; + } + + vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE, + pfns, err, nb_pfn); + if (vaddr_base == NULL) { + perror("xc_map_foreign_bulk"); + exit(-1); + } + + qemu_free(pfns); + qemu_free(err); + + return vaddr_base; +} -- cgit v1.2.3-55-g7522 From ea6c5f8ffe6de12e04e63acbb9937683b30216e2 Mon Sep 17 00:00:00 2001 From: John Baboval Date: Tue, 22 Mar 2011 14:50:28 +0000 Subject: xen: Adds a cap to the number of map cache entries. Adds a cap to the number of map cache entries. This prevents the map cache from overwhelming system memory. I also removed the bitmap macros and #included bitmap.h instead. Signed-off-By: John Baboval Signed-off-by: Anthony PERARD Signed-off-by: Alexander Graf --- xen-mapcache.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) (limited to 'xen-mapcache.c') diff --git a/xen-mapcache.c b/xen-mapcache.c index a539358fb7..2ca18ce3b5 100644 --- a/xen-mapcache.c +++ b/xen-mapcache.c @@ -12,6 +12,7 @@ #include "hw/xen_backend.h" #include "blockdev.h" +#include "bitmap.h" #include #include @@ -32,15 +33,13 @@ #if defined(__i386__) # define MCACHE_BUCKET_SHIFT 16 +# define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */ #elif defined(__x86_64__) # define MCACHE_BUCKET_SHIFT 20 +# define MCACHE_MAX_SIZE (1UL<<35) /* 32GB Cap */ #endif #define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) -#define BITS_PER_LONG (sizeof(long) * 8) -#define BITS_TO_LONGS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG) -#define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)] - typedef struct MapCacheEntry { target_phys_addr_t paddr_index; uint8_t *vaddr_base; @@ -69,11 +68,6 @@ typedef struct MapCache { static MapCache *mapcache; -static inline int test_bit(unsigned int bit, const unsigned long *map) -{ - return !!((map)[(bit) / BITS_PER_LONG] & (1UL << ((bit) % BITS_PER_LONG))); -} - void qemu_map_cache_init(void) { unsigned long size; @@ -85,9 +79,14 @@ void qemu_map_cache_init(void) mapcache->last_address_index = -1; getrlimit(RLIMIT_AS, &rlimit_as); - rlimit_as.rlim_cur = rlimit_as.rlim_max; + if (rlimit_as.rlim_max < MCACHE_MAX_SIZE) { + rlimit_as.rlim_cur = rlimit_as.rlim_max; + } else { + rlimit_as.rlim_cur = MCACHE_MAX_SIZE; + } + setrlimit(RLIMIT_AS, &rlimit_as); - mapcache->max_mcache_size = rlimit_as.rlim_max; + mapcache->max_mcache_size = rlimit_as.rlim_cur; mapcache->nr_buckets = (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) + @@ -107,7 +106,7 @@ static void qemu_remap_bucket(MapCacheEntry *entry, uint8_t *vaddr_base; xen_pfn_t *pfns; int *err; - unsigned int i, j; + unsigned int i; target_phys_addr_t nb_pfn = size >> XC_PAGE_SHIFT; trace_qemu_remap_bucket(address_index); @@ -136,17 +135,11 @@ static void qemu_remap_bucket(MapCacheEntry *entry, entry->vaddr_base = vaddr_base; entry->paddr_index = address_index; - for (i = 0; i < nb_pfn; i += BITS_PER_LONG) { - unsigned long word = 0; - if ((i + BITS_PER_LONG) > nb_pfn) { - j = nb_pfn % BITS_PER_LONG; - } else { - j = BITS_PER_LONG; - } - while (j > 0) { - word = (word << 1) | !err[i + --j]; + bitmap_zero(entry->valid_mapping, nb_pfn); + for (i = 0; i < nb_pfn; i++) { + if (!err[i]) { + bitmap_set(entry->valid_mapping, i, 1); } - entry->valid_mapping[i / BITS_PER_LONG] = word; } qemu_free(pfns); -- cgit v1.2.3-55-g7522 From 050a0ddf39cf5555ecf9b22a918b2a2b00bdbe34 Mon Sep 17 00:00:00 2001 From: Anthony PERARD Date: Thu, 16 Sep 2010 13:57:49 +0100 Subject: Introduce qemu_put_ram_ptr This function allows to unlock a ram_ptr give by qemu_get_ram_ptr. After a call to qemu_put_ram_ptr, the pointer may be unmap from QEMU when used with Xen. Signed-off-by: Anthony PERARD Acked-by: Alexander Graf Signed-off-by: Alexander Graf --- cpu-common.h | 1 + exec.c | 38 +++++++++++++++++++++++++++++++++++--- trace-events | 3 +++ xen-mapcache.c | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 3 deletions(-) (limited to 'xen-mapcache.c') diff --git a/cpu-common.h b/cpu-common.h index 6410cccda5..151c32c1f2 100644 --- a/cpu-common.h +++ b/cpu-common.h @@ -67,6 +67,7 @@ void *qemu_get_ram_ptr(ram_addr_t addr); /* Same but slower, to use for migration, where the order of * RAMBlocks must not change. */ void *qemu_safe_ram_ptr(ram_addr_t addr); +void qemu_put_ram_ptr(void *addr); /* This should not be used by devices. */ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr); ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr); diff --git a/exec.c b/exec.c index 8c81cffaa7..a6df2d6139 100644 --- a/exec.c +++ b/exec.c @@ -3110,6 +3110,27 @@ void *qemu_safe_ram_ptr(ram_addr_t addr) return NULL; } +void qemu_put_ram_ptr(void *addr) +{ + trace_qemu_put_ram_ptr(addr); + + if (xen_mapcache_enabled()) { + RAMBlock *block; + + QLIST_FOREACH(block, &ram_list.blocks, next) { + if (addr == block->host) { + break; + } + } + if (block && block->host) { + xen_unmap_block(block->host, block->length); + block->host = NULL; + } else { + qemu_map_cache_unlock(addr); + } + } +} + int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) { RAMBlock *block; @@ -3825,6 +3846,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, cpu_physical_memory_set_dirty_flags( addr1, (0xff & ~CODE_DIRTY_FLAG)); } + qemu_put_ram_ptr(ptr); } } else { if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && @@ -3852,9 +3874,9 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, } } else { /* RAM case */ - ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) + - (addr & ~TARGET_PAGE_MASK); - memcpy(buf, ptr, l); + ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK); + memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l); + qemu_put_ram_ptr(ptr); } } len -= l; @@ -3895,6 +3917,7 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr, /* ROM/RAM case */ ptr = qemu_get_ram_ptr(addr1); memcpy(ptr, buf, l); + qemu_put_ram_ptr(ptr); } len -= l; buf += l; @@ -4036,6 +4059,15 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len, access_len -= l; } } + if (xen_mapcache_enabled()) { + uint8_t *buffer1 = buffer; + uint8_t *end_buffer = buffer + len; + + while (buffer1 < end_buffer) { + qemu_put_ram_ptr(buffer1); + buffer1 += TARGET_PAGE_SIZE; + } + } return; } if (is_write) { diff --git a/trace-events b/trace-events index d703347d40..a00b63cefd 100644 --- a/trace-events +++ b/trace-events @@ -371,3 +371,6 @@ disable qemu_remap_bucket(uint64_t index) "index %#"PRIx64"" disable qemu_map_cache_return(void* ptr) "%p" disable xen_map_block(uint64_t phys_addr, uint64_t size) "%#"PRIx64", size %#"PRIx64"" disable xen_unmap_block(void* addr, unsigned long size) "%p, size %#lx" + +# exec.c +disable qemu_put_ram_ptr(void* addr) "%p" diff --git a/xen-mapcache.c b/xen-mapcache.c index 2ca18ce3b5..349cc6221d 100644 --- a/xen-mapcache.c +++ b/xen-mapcache.c @@ -196,6 +196,39 @@ uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, u return mapcache->last_address_vaddr + address_offset; } +void qemu_map_cache_unlock(void *buffer) +{ + MapCacheEntry *entry = NULL, *pentry = NULL; + MapCacheRev *reventry; + target_phys_addr_t paddr_index; + int found = 0; + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == buffer) { + paddr_index = reventry->paddr_index; + found = 1; + break; + } + } + if (!found) { + return; + } + QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); + qemu_free(reventry); + + entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; + while (entry && entry->paddr_index != paddr_index) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + return; + } + if (entry->lock > 0) { + entry->lock--; + } +} + ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) { MapCacheRev *reventry; -- cgit v1.2.3-55-g7522