From 56b15076805a29673c1a90ea9c3ebef25bfcc912 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 21 Mar 2019 16:25:50 +0800 Subject: exec.c: replace hwaddr with uint64_t for better understanding Function phys_page_set() and phys_page_set_level() 's argument *nb* stands for number of pages to set instead of hardware address. This would be more proper to use uint64_t instead of hwaddr for its type. Signed-off-by: Wei Yang Message-Id: <20190321082555.21118-2-richardw.yang@linux.intel.com> Signed-off-by: Paolo Bonzini --- exec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'exec.c') diff --git a/exec.c b/exec.c index 235d6bc883..8e0c400de8 100644 --- a/exec.c +++ b/exec.c @@ -255,7 +255,7 @@ static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf) } static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp, - hwaddr *index, hwaddr *nb, uint16_t leaf, + hwaddr *index, uint64_t *nb, uint16_t leaf, int level) { PhysPageEntry *p; @@ -281,7 +281,7 @@ static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp, } static void phys_page_set(AddressSpaceDispatch *d, - hwaddr index, hwaddr nb, + hwaddr index, uint64_t nb, uint16_t leaf) { /* Wildly overreserve - it doesn't matter much. */ -- cgit v1.2.3-55-g7522 From c95cfd040078db8017f74fd3a4d6f798385d960c Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 21 Mar 2019 16:25:52 +0800 Subject: exec.c: get nodes_nb_alloc with one MAX calculation The purpose of these two MAX here is to get the maximum of these three variables: A: map->nodes_nb + nodes B: map->nodes_nb_alloc C: alloc_hint We can write it like MAX(A, B, C). Since the if condition says A > B, this means MAX(A, B, C) = MAX(A, C). This patch just simplify the calculation a bit. Signed-off-by: Wei Yang Message-Id: <20190321082555.21118-4-richardw.yang@linux.intel.com> Signed-off-by: Paolo Bonzini --- exec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'exec.c') diff --git a/exec.c b/exec.c index 8e0c400de8..ff3cb3e25b 100644 --- a/exec.c +++ b/exec.c @@ -227,8 +227,7 @@ static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes) { static unsigned alloc_hint = 16; if (map->nodes_nb + nodes > map->nodes_nb_alloc) { - map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint); - map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes); + map->nodes_nb_alloc = MAX(alloc_hint, map->nodes_nb + nodes); map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc); alloc_hint = map->nodes_nb_alloc; } -- cgit v1.2.3-55-g7522 From b797ab1a15ba8d2b2fc4ec3e1f24d755f6855d05 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 21 Mar 2019 16:25:53 +0800 Subject: exec.c: subpage->sub_section is already initialized to 0 In subpage_init(), we will set subpage->sub_section to PHYS_SECTION_UNASSIGNED by subpage_register. Since PHYS_SECTION_UNASSIGNED is defined to be 0, and we allocate subpage with g_malloc0, this means subpage->sub_section is already initialized to 0. This patch removes the redundant setup for a new subpage and also fix the code style. Signed-off-by: Wei Yang Message-Id: <20190321082555.21118-5-richardw.yang@linux.intel.com> Signed-off-by: Paolo Bonzini --- exec.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'exec.c') diff --git a/exec.c b/exec.c index ff3cb3e25b..d9827ef840 100644 --- a/exec.c +++ b/exec.c @@ -1491,8 +1491,8 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu, #if !defined(CONFIG_USER_ONLY) -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end, - uint16_t section); +static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end, + uint16_t section); static subpage_t *subpage_init(FlatView *fv, hwaddr base); static void *(*phys_mem_alloc)(size_t size, uint64_t *align, bool shared) = @@ -2913,8 +2913,8 @@ static const MemoryRegionOps subpage_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end, - uint16_t section) +static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end, + uint16_t section) { int idx, eidx; @@ -2937,6 +2937,7 @@ static subpage_t *subpage_init(FlatView *fv, hwaddr base) { subpage_t *mmio; + /* mmio->sub_section is set to PHYS_SECTION_UNASSIGNED with g_malloc0 */ mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t)); mmio->fv = fv; mmio->base = base; @@ -2947,7 +2948,6 @@ static subpage_t *subpage_init(FlatView *fv, hwaddr base) printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__, mmio, base, TARGET_PAGE_SIZE); #endif - subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED); return mmio; } -- cgit v1.2.3-55-g7522 From 26ca2075babd7775e246b9eb7da75d6de77eb658 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 21 Mar 2019 16:25:54 +0800 Subject: exec.c: correct the maximum skip value during compact skip is defined with 6 bits. So the maximum value should be (1 << 6). Signed-off-by: Wei Yang Message-Id: <20190321082555.21118-6-richardw.yang@linux.intel.com> Signed-off-by: Paolo Bonzini --- exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'exec.c') diff --git a/exec.c b/exec.c index d9827ef840..d1969bb894 100644 --- a/exec.c +++ b/exec.c @@ -324,7 +324,7 @@ static void phys_page_compact(PhysPageEntry *lp, Node *nodes) assert(valid_ptr < P_L2_SIZE); /* Don't compress if it won't fit in the # of bits we have. */ - if (lp->skip + p[valid_ptr].skip >= (1 << 3)) { + if (lp->skip + p[valid_ptr].skip >= (1 << 6)) { return; } -- cgit v1.2.3-55-g7522 From 526ca2360ea1cd947f74c8c6c38b91b9d6fcfdb5 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 21 Mar 2019 16:25:55 +0800 Subject: exec.c: add a check between constants to see whether we could skip The maximum level is defined as P_L2_LEVELS and skip is defined with 6 bits, which means if P_L2_LEVELS < (1 << 6), skip never exceeds the boundary. Since this check is between two constants, which leverages compiler to optimize the code based on different configuration. Signed-off-by: Wei Yang Message-Id: <20190321082555.21118-7-richardw.yang@linux.intel.com> Signed-off-by: Paolo Bonzini --- exec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'exec.c') diff --git a/exec.c b/exec.c index d1969bb894..b9511be096 100644 --- a/exec.c +++ b/exec.c @@ -324,7 +324,8 @@ static void phys_page_compact(PhysPageEntry *lp, Node *nodes) assert(valid_ptr < P_L2_SIZE); /* Don't compress if it won't fit in the # of bits we have. */ - if (lp->skip + p[valid_ptr].skip >= (1 << 6)) { + if (P_L2_LEVELS >= (1 << 6) && + lp->skip + p[valid_ptr].skip >= (1 << 6)) { return; } -- cgit v1.2.3-55-g7522 From 72d41eb4b8f923de91e8f06dc20aa86b0a9155fb Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 30 Aug 2019 10:30:56 +0100 Subject: memory: fetch pmem size in get_file_size() Neither stat(2) nor lseek(2) report the size of Linux devdax pmem character device nodes. Commit 314aec4a6e06844937f1677f6cba21981005f389 ("hostmem-file: reject invalid pmem file sizes") added code to hostmem-file.c to fetch the size from sysfs and compare against the user-provided size=NUM parameter: if (backend->size > size) { error_setg(errp, "size property %" PRIu64 " is larger than " "pmem file \"%s\" size %" PRIu64, backend->size, fb->mem_path, size); return; } It turns out that exec.c:qemu_ram_alloc_from_fd() already has an equivalent size check but it skips devdax pmem character devices because lseek(2) returns 0: if (file_size > 0 && file_size < size) { error_setg(errp, "backing store %s size 0x%" PRIx64 " does not match 'size' option 0x" RAM_ADDR_FMT, mem_path, file_size, size); return NULL; } This patch moves the devdax pmem file size code into get_file_size() so that we check the memory size in a single place: qemu_ram_alloc_from_fd(). This simplifies the code and makes it more general. This also fixes the problem that hostmem-file only checks the devdax pmem file size when the pmem=on parameter is given. An unchecked size=NUM parameter can lead to SIGBUS in QEMU so we must always fetch the file size for Linux devdax pmem character device nodes. Signed-off-by: Stefan Hajnoczi Message-Id: <20190830093056.12572-1-stefanha@redhat.com> Reviewed-by: Eduardo Habkost Signed-off-by: Paolo Bonzini --- backends/hostmem-file.c | 22 -------------------- exec.c | 34 ++++++++++++++++++++++++++++++- include/qemu/osdep.h | 13 ------------ util/oslib-posix.c | 54 ------------------------------------------------- util/oslib-win32.c | 6 ------ 5 files changed, 33 insertions(+), 96 deletions(-) (limited to 'exec.c') diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index ecc15e3eb0..be64020746 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -58,28 +58,6 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) return; } - /* - * Verify pmem file size since starting a guest with an incorrect size - * leads to confusing failures inside the guest. - */ - if (fb->is_pmem) { - Error *local_err = NULL; - uint64_t size; - - size = qemu_get_pmem_size(fb->mem_path, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - - if (size && backend->size > size) { - error_setg(errp, "size property %" PRIu64 " is larger than " - "pmem file \"%s\" size %" PRIu64, backend->size, - fb->mem_path, size); - return; - } - } - backend->force_prealloc = mem_prealloc; name = host_memory_backend_get_name(backend); memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), diff --git a/exec.c b/exec.c index b9511be096..8b998974f8 100644 --- a/exec.c +++ b/exec.c @@ -1791,7 +1791,39 @@ long qemu_maxrampagesize(void) #ifdef CONFIG_POSIX static int64_t get_file_size(int fd) { - int64_t size = lseek(fd, 0, SEEK_END); + int64_t size; +#if defined(__linux__) + struct stat st; + + if (fstat(fd, &st) < 0) { + return -errno; + } + + /* Special handling for devdax character devices */ + if (S_ISCHR(st.st_mode)) { + g_autofree char *subsystem_path = NULL; + g_autofree char *subsystem = NULL; + + subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem", + major(st.st_rdev), minor(st.st_rdev)); + subsystem = g_file_read_link(subsystem_path, NULL); + + if (subsystem && g_str_has_suffix(subsystem, "/dax")) { + g_autofree char *size_path = NULL; + g_autofree char *size_str = NULL; + + size_path = g_strdup_printf("/sys/dev/char/%d:%d/size", + major(st.st_rdev), minor(st.st_rdev)); + + if (g_file_get_contents(size_path, &size_str, NULL, NULL)) { + return g_ascii_strtoll(size_str, NULL, 0); + } + } + } +#endif /* defined(__linux__) */ + + /* st.st_size may be zero for special files yet lseek(2) works */ + size = lseek(fd, 0, SEEK_END); if (size < 0) { return -errno; } diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index af2b91f0b8..c7d242f476 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -570,19 +570,6 @@ void qemu_set_tty_echo(int fd, bool echo); void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus, Error **errp); -/** - * qemu_get_pmem_size: - * @filename: path to a pmem file - * @errp: pointer to a NULL-initialized error object - * - * Determine the size of a persistent memory file. Besides supporting files on - * DAX file systems, this function also supports Linux devdax character - * devices. - * - * Returns: the size or 0 on failure - */ -uint64_t qemu_get_pmem_size(const char *filename, Error **errp); - /** * qemu_get_pid_name: * @pid: pid of a process diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 5fda67dedf..f8693384fc 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -514,60 +514,6 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, } } -uint64_t qemu_get_pmem_size(const char *filename, Error **errp) -{ - struct stat st; - - if (stat(filename, &st) < 0) { - error_setg(errp, "unable to stat pmem file \"%s\"", filename); - return 0; - } - -#if defined(__linux__) - /* Special handling for devdax character devices */ - if (S_ISCHR(st.st_mode)) { - char *subsystem_path = NULL; - char *subsystem = NULL; - char *size_path = NULL; - char *size_str = NULL; - uint64_t ret = 0; - - subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem", - major(st.st_rdev), minor(st.st_rdev)); - subsystem = g_file_read_link(subsystem_path, NULL); - if (!subsystem) { - error_setg(errp, "unable to read subsystem for pmem file \"%s\"", - filename); - goto devdax_err; - } - - if (!g_str_has_suffix(subsystem, "/dax")) { - error_setg(errp, "pmem file \"%s\" is not a dax device", filename); - goto devdax_err; - } - - size_path = g_strdup_printf("/sys/dev/char/%d:%d/size", - major(st.st_rdev), minor(st.st_rdev)); - if (!g_file_get_contents(size_path, &size_str, NULL, NULL)) { - error_setg(errp, "unable to read size for pmem file \"%s\"", - size_path); - goto devdax_err; - } - - ret = g_ascii_strtoull(size_str, NULL, 0); - -devdax_err: - g_free(size_str); - g_free(size_path); - g_free(subsystem); - g_free(subsystem_path); - return ret; - } -#endif /* defined(__linux__) */ - - return st.st_size; -} - char *qemu_get_pid_name(pid_t pid) { char *name = NULL; diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 9583fb4ca4..c62cd4328c 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -562,12 +562,6 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, } } -uint64_t qemu_get_pmem_size(const char *filename, Error **errp) -{ - error_setg(errp, "pmem support not available"); - return 0; -} - char *qemu_get_pid_name(pid_t pid) { /* XXX Implement me */ -- cgit v1.2.3-55-g7522