summaryrefslogtreecommitdiffstats
path: root/exec.c
diff options
context:
space:
mode:
authorAnthony Liguori2013-12-13 20:10:19 +0100
committerAnthony Liguori2013-12-13 20:10:20 +0100
commit5d0e2280cc344f1b939acff431ed2731a9ee7db5 (patch)
tree2fe94747c0a0b5be6bb5a62d2bb7a909717a5b84 /exec.c
parentMerge remote-tracking branch 'bonzini/scsi-next' into staging (diff)
parentpc: use macro for HPET type (diff)
downloadqemu-5d0e2280cc344f1b939acff431ed2731a9ee7db5.tar.gz
qemu-5d0e2280cc344f1b939acff431ed2731a9ee7db5.tar.xz
qemu-5d0e2280cc344f1b939acff431ed2731a9ee7db5.zip
Merge remote-tracking branch 'mst/tags/for_anthony' into staging
acpi.pci,pc,memory core fixes Most notably this includes changes to exec to support full 64 bit addresses. This also flushes out patches that got queued during 1.7 freeze. There are new tests, and a bunch of bug fixes all over the place. There are also some changes mostly useful for downstreams. I'm also listing myself as pc co-maintainer. I'm doing this reluctantly, but this seems to be necessary to make sure patches are not lost or delayed too much, and posting the MAINTAINERS patch did not seem to make anyone else volunteer. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Wed 11 Dec 2013 10:21:51 AM PST using RSA key ID D28D5469 # gpg: Can't check signature: public key not found # By Michael S. Tsirkin (14) and others # Via Michael S. Tsirkin * mst/tags/for_anthony: (28 commits) pc: use macro for HPET type hpet: fix build with CONFIG_HPET off acpi unit-test: adjust the test data structure for better handling acpi unit-test: load and check facs table exec: separate sections and nodes per address space memory.c: bugfix - ref counting mismatch in memory_region_find hpet: enable to entitle more irq pins for hpet hpet: inverse polarity when pin above ISA_NUM_IRQS pci: fix pci bridge fw path ACPI DSDT: Make control method `IQCR` serialized acpi: strip compiler info in built-in DSDT acpi unit-test: verify signature and checksum smbios: Set system manufacturer, product & version by default exec: reduce L2_PAGE_SIZE exec: make address spaces 64-bit wide exec: memory radix tree page level compression exec: pass hw address to phys_page_find exec: extend skip field to 6 bit, page entry to 32 bit exec: replace leaf with skip split definitions for exec.c and translate-all.c radix trees ... Message-id: cover.1386786228.git.mst@redhat.com Signed-off-by: Anthony Liguori <aliguori@amazon.com>
Diffstat (limited to 'exec.c')
-rw-r--r--exec.c278
1 files changed, 169 insertions, 109 deletions
diff --git a/exec.c b/exec.c
index f4b9ef25f5..00526d18c0 100644
--- a/exec.c
+++ b/exec.c
@@ -51,6 +51,8 @@
#include "exec/memory-internal.h"
+#include "qemu/range.h"
+
//#define DEBUG_SUBPAGE
#if !defined(CONFIG_USER_ONLY)
@@ -83,20 +85,39 @@ int use_icount;
typedef struct PhysPageEntry PhysPageEntry;
struct PhysPageEntry {
- uint16_t is_leaf : 1;
- /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
- uint16_t ptr : 15;
+ /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
+ uint32_t skip : 6;
+ /* index into phys_sections (!skip) or phys_map_nodes (skip) */
+ uint32_t ptr : 26;
};
-typedef PhysPageEntry Node[L2_SIZE];
+#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
+
+/* Size of the L2 (and L3, etc) page tables. */
+#define ADDR_SPACE_BITS 64
+
+#define P_L2_BITS 9
+#define P_L2_SIZE (1 << P_L2_BITS)
+
+#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
+
+typedef PhysPageEntry Node[P_L2_SIZE];
+
+typedef struct PhysPageMap {
+ unsigned sections_nb;
+ unsigned sections_nb_alloc;
+ unsigned nodes_nb;
+ unsigned nodes_nb_alloc;
+ Node *nodes;
+ MemoryRegionSection *sections;
+} PhysPageMap;
struct AddressSpaceDispatch {
/* This is a multi-level map on the physical address space.
* The bottom level has pointers to MemoryRegionSections.
*/
PhysPageEntry phys_map;
- Node *nodes;
- MemoryRegionSection *sections;
+ PhysPageMap map;
AddressSpace *as;
};
@@ -113,20 +134,6 @@ typedef struct subpage_t {
#define PHYS_SECTION_ROM 2
#define PHYS_SECTION_WATCH 3
-typedef struct PhysPageMap {
- unsigned sections_nb;
- unsigned sections_nb_alloc;
- unsigned nodes_nb;
- unsigned nodes_nb_alloc;
- Node *nodes;
- MemoryRegionSection *sections;
-} PhysPageMap;
-
-static PhysPageMap *prev_map;
-static PhysPageMap next_map;
-
-#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
-
static void io_mem_init(void);
static void memory_map_init(void);
@@ -135,63 +142,60 @@ static MemoryRegion io_mem_watch;
#if !defined(CONFIG_USER_ONLY)
-static void phys_map_node_reserve(unsigned nodes)
+static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
{
- if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
- next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
- 16);
- next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
- next_map.nodes_nb + nodes);
- next_map.nodes = g_renew(Node, next_map.nodes,
- next_map.nodes_nb_alloc);
+ if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
+ map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
+ map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
+ map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
}
}
-static uint16_t phys_map_node_alloc(void)
+static uint32_t phys_map_node_alloc(PhysPageMap *map)
{
unsigned i;
- uint16_t ret;
+ uint32_t ret;
- ret = next_map.nodes_nb++;
+ ret = map->nodes_nb++;
assert(ret != PHYS_MAP_NODE_NIL);
- assert(ret != next_map.nodes_nb_alloc);
- for (i = 0; i < L2_SIZE; ++i) {
- next_map.nodes[ret][i].is_leaf = 0;
- next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
+ assert(ret != map->nodes_nb_alloc);
+ for (i = 0; i < P_L2_SIZE; ++i) {
+ map->nodes[ret][i].skip = 1;
+ map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
}
return ret;
}
-static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
- hwaddr *nb, uint16_t leaf,
+static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
+ hwaddr *index, hwaddr *nb, uint16_t leaf,
int level)
{
PhysPageEntry *p;
int i;
- hwaddr step = (hwaddr)1 << (level * L2_BITS);
+ hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
- if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
- lp->ptr = phys_map_node_alloc();
- p = next_map.nodes[lp->ptr];
+ if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
+ lp->ptr = phys_map_node_alloc(map);
+ p = map->nodes[lp->ptr];
if (level == 0) {
- for (i = 0; i < L2_SIZE; i++) {
- p[i].is_leaf = 1;
+ for (i = 0; i < P_L2_SIZE; i++) {
+ p[i].skip = 0;
p[i].ptr = PHYS_SECTION_UNASSIGNED;
}
}
} else {
- p = next_map.nodes[lp->ptr];
+ p = map->nodes[lp->ptr];
}
- lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
+ lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
- while (*nb && lp < &p[L2_SIZE]) {
+ while (*nb && lp < &p[P_L2_SIZE]) {
if ((*index & (step - 1)) == 0 && *nb >= step) {
- lp->is_leaf = true;
+ lp->skip = 0;
lp->ptr = leaf;
*index += step;
*nb -= step;
} else {
- phys_page_set_level(lp, index, nb, leaf, level - 1);
+ phys_page_set_level(map, lp, index, nb, leaf, level - 1);
}
++lp;
}
@@ -202,25 +206,95 @@ static void phys_page_set(AddressSpaceDispatch *d,
uint16_t leaf)
{
/* Wildly overreserve - it doesn't matter much. */
- phys_map_node_reserve(3 * P_L2_LEVELS);
+ phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
+
+ phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
+}
+
+/* Compact a non leaf page entry. Simply detect that the entry has a single child,
+ * and update our entry so we can skip it and go directly to the destination.
+ */
+static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
+{
+ unsigned valid_ptr = P_L2_SIZE;
+ int valid = 0;
+ PhysPageEntry *p;
+ int i;
+
+ if (lp->ptr == PHYS_MAP_NODE_NIL) {
+ return;
+ }
+
+ p = nodes[lp->ptr];
+ for (i = 0; i < P_L2_SIZE; i++) {
+ if (p[i].ptr == PHYS_MAP_NODE_NIL) {
+ continue;
+ }
+
+ valid_ptr = i;
+ valid++;
+ if (p[i].skip) {
+ phys_page_compact(&p[i], nodes, compacted);
+ }
+ }
+
+ /* We can only compress if there's only one child. */
+ if (valid != 1) {
+ return;
+ }
+
+ assert(valid_ptr < P_L2_SIZE);
- phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
+ /* Don't compress if it won't fit in the # of bits we have. */
+ if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
+ return;
+ }
+
+ lp->ptr = p[valid_ptr].ptr;
+ if (!p[valid_ptr].skip) {
+ /* If our only child is a leaf, make this a leaf. */
+ /* By design, we should have made this node a leaf to begin with so we
+ * should never reach here.
+ * But since it's so simple to handle this, let's do it just in case we
+ * change this rule.
+ */
+ lp->skip = 0;
+ } else {
+ lp->skip += p[valid_ptr].skip;
+ }
+}
+
+static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
+{
+ DECLARE_BITMAP(compacted, nodes_nb);
+
+ if (d->phys_map.skip) {
+ phys_page_compact(&d->phys_map, d->map.nodes, compacted);
+ }
}
-static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index,
+static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
Node *nodes, MemoryRegionSection *sections)
{
PhysPageEntry *p;
+ hwaddr index = addr >> TARGET_PAGE_BITS;
int i;
- for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
+ for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
if (lp.ptr == PHYS_MAP_NODE_NIL) {
return &sections[PHYS_SECTION_UNASSIGNED];
}
p = nodes[lp.ptr];
- lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
+ lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
+ }
+
+ if (sections[lp.ptr].size.hi ||
+ range_covers_byte(sections[lp.ptr].offset_within_address_space,
+ sections[lp.ptr].size.lo, addr)) {
+ return &sections[lp.ptr];
+ } else {
+ return &sections[PHYS_SECTION_UNASSIGNED];
}
- return &sections[lp.ptr];
}
bool memory_region_is_unassigned(MemoryRegion *mr)
@@ -236,11 +310,10 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
MemoryRegionSection *section;
subpage_t *subpage;
- section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS,
- d->nodes, d->sections);
+ section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
if (resolve_subpage && section->mr->subpage) {
subpage = container_of(section->mr, subpage_t, iomem);
- section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
+ section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
}
return section;
}
@@ -708,7 +781,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
iotlb |= PHYS_SECTION_ROM;
}
} else {
- iotlb = section - address_space_memory.dispatch->sections;
+ iotlb = section - address_space_memory.dispatch->map.sections;
iotlb += xlat;
}
@@ -747,23 +820,23 @@ void phys_mem_set_alloc(void *(*alloc)(size_t))
phys_mem_alloc = alloc;
}
-static uint16_t phys_section_add(MemoryRegionSection *section)
+static uint16_t phys_section_add(PhysPageMap *map,
+ MemoryRegionSection *section)
{
/* The physical section number is ORed with a page-aligned
* pointer to produce the iotlb entries. Thus it should
* never overflow into the page-aligned value.
*/
- assert(next_map.sections_nb < TARGET_PAGE_SIZE);
+ assert(map->sections_nb < TARGET_PAGE_SIZE);
- if (next_map.sections_nb == next_map.sections_nb_alloc) {
- next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
- 16);
- next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
- next_map.sections_nb_alloc);
+ if (map->sections_nb == map->sections_nb_alloc) {
+ map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
+ map->sections = g_renew(MemoryRegionSection, map->sections,
+ map->sections_nb_alloc);
}
- next_map.sections[next_map.sections_nb] = *section;
+ map->sections[map->sections_nb] = *section;
memory_region_ref(section->mr);
- return next_map.sections_nb++;
+ return map->sections_nb++;
}
static void phys_section_destroy(MemoryRegion *mr)
@@ -785,7 +858,6 @@ static void phys_sections_free(PhysPageMap *map)
}
g_free(map->sections);
g_free(map->nodes);
- g_free(map);
}
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
@@ -793,8 +865,8 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
subpage_t *subpage;
hwaddr base = section->offset_within_address_space
& TARGET_PAGE_MASK;
- MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> TARGET_PAGE_BITS,
- next_map.nodes, next_map.sections);
+ MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
+ d->map.nodes, d->map.sections);
MemoryRegionSection subsection = {
.offset_within_address_space = base,
.size = int128_make64(TARGET_PAGE_SIZE),
@@ -807,13 +879,14 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
subpage = subpage_init(d->as, base);
subsection.mr = &subpage->iomem;
phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
- phys_section_add(&subsection));
+ phys_section_add(&d->map, &subsection));
} else {
subpage = container_of(existing->mr, subpage_t, iomem);
}
start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
end = start + int128_get64(section->size) - 1;
- subpage_register(subpage, start, end, phys_section_add(section));
+ subpage_register(subpage, start, end,
+ phys_section_add(&d->map, section));
}
@@ -821,7 +894,7 @@ static void register_multipage(AddressSpaceDispatch *d,
MemoryRegionSection *section)
{
hwaddr start_addr = section->offset_within_address_space;
- uint16_t section_index = phys_section_add(section);
+ uint16_t section_index = phys_section_add(&d->map, section);
uint64_t num_pages = int128_get64(int128_rshift(section->size,
TARGET_PAGE_BITS));
@@ -1640,7 +1713,7 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
return mmio;
}
-static uint16_t dummy_section(MemoryRegion *mr)
+static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
{
MemoryRegionSection section = {
.mr = mr,
@@ -1649,12 +1722,13 @@ static uint16_t dummy_section(MemoryRegion *mr)
.size = int128_2_64(),
};
- return phys_section_add(&section);
+ return phys_section_add(map, &section);
}
MemoryRegion *iotlb_to_region(hwaddr index)
{
- return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
+ return address_space_memory.dispatch->map.sections[
+ index & ~TARGET_PAGE_MASK].mr;
}
static void io_mem_init(void)
@@ -1671,9 +1745,19 @@ static void io_mem_init(void)
static void mem_begin(MemoryListener *listener)
{
AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
- AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
+ AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
+ uint16_t n;
- d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
+ n = dummy_section(&d->map, &io_mem_unassigned);
+ assert(n == PHYS_SECTION_UNASSIGNED);
+ n = dummy_section(&d->map, &io_mem_notdirty);
+ assert(n == PHYS_SECTION_NOTDIRTY);
+ n = dummy_section(&d->map, &io_mem_rom);
+ assert(n == PHYS_SECTION_ROM);
+ n = dummy_section(&d->map, &io_mem_watch);
+ assert(n == PHYS_SECTION_WATCH);
+
+ d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
d->as = as;
as->next_dispatch = d;
}
@@ -1684,37 +1768,14 @@ static void mem_commit(MemoryListener *listener)
AddressSpaceDispatch *cur = as->dispatch;
AddressSpaceDispatch *next = as->next_dispatch;
- next->nodes = next_map.nodes;
- next->sections = next_map.sections;
+ phys_page_compact_all(next, next->map.nodes_nb);
as->dispatch = next;
- g_free(cur);
-}
-
-static void core_begin(MemoryListener *listener)
-{
- uint16_t n;
-
- prev_map = g_new(PhysPageMap, 1);
- *prev_map = next_map;
- memset(&next_map, 0, sizeof(next_map));
- n = dummy_section(&io_mem_unassigned);
- assert(n == PHYS_SECTION_UNASSIGNED);
- n = dummy_section(&io_mem_notdirty);
- assert(n == PHYS_SECTION_NOTDIRTY);
- n = dummy_section(&io_mem_rom);
- assert(n == PHYS_SECTION_ROM);
- n = dummy_section(&io_mem_watch);
- assert(n == PHYS_SECTION_WATCH);
-}
-
-/* This listener's commit run after the other AddressSpaceDispatch listeners'.
- * All AddressSpaceDispatch instances have switched to the next map.
- */
-static void core_commit(MemoryListener *listener)
-{
- phys_sections_free(prev_map);
+ if (cur) {
+ phys_sections_free(&cur->map);
+ g_free(cur);
+ }
}
static void tcg_commit(MemoryListener *listener)
@@ -1742,8 +1803,6 @@ static void core_log_global_stop(MemoryListener *listener)
}
static MemoryListener core_memory_listener = {
- .begin = core_begin,
- .commit = core_commit,
.log_global_start = core_log_global_start,
.log_global_stop = core_log_global_stop,
.priority = 1,
@@ -1778,7 +1837,8 @@ void address_space_destroy_dispatch(AddressSpace *as)
static void memory_map_init(void)
{
system_memory = g_malloc(sizeof(*system_memory));
- memory_region_init(system_memory, NULL, "system", INT64_MAX);
+
+ memory_region_init(system_memory, NULL, "system", UINT64_MAX);
address_space_init(&address_space_memory, system_memory, "memory");
system_io = g_malloc(sizeof(*system_io));