From 34506b30e4210b417aa38d1518aa2c53fb7cf39b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 15 Jul 2016 20:58:41 +0300 Subject: util/qht: Document memory ordering assumptions It is naturally expected that some memory ordering should be provided around qht_insert() and qht_lookup(). Document these assumptions in the header file and put some comments in the source to denote how that memory ordering requirements are fulfilled. Signed-off-by: Paolo Bonzini [Sergey Fedorov: commit title and message provided; comment on qht_remove() elided] Signed-off-by: Sergey Fedorov Message-Id: <20160715175852.30749-2-sergey.fedorov@linaro.org> Signed-off-by: Paolo Bonzini --- include/qemu/qht.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/qemu/qht.h b/include/qemu/qht.h index 70bfc68b8d..311139b85a 100644 --- a/include/qemu/qht.h +++ b/include/qemu/qht.h @@ -69,6 +69,9 @@ void qht_destroy(struct qht *ht); * Attempting to insert a NULL @p is a bug. * Inserting the same pointer @p with different @hash values is a bug. * + * In case of successful operation, smp_wmb() is implied before the pointer is + * inserted into the hash table. + * * Returns true on sucess. * Returns false if the @p-@hash pair already exists in the hash table. */ @@ -83,6 +86,8 @@ bool qht_insert(struct qht *ht, void *p, uint32_t hash); * * Needs to be called under an RCU read-critical section. * + * smp_read_barrier_depends() is implied before the call to @func. + * * The user-provided @func compares pointers in QHT against @userp. * If the function returns true, a match has been found. * -- cgit v1.2.3-55-g7522 From 056b68af773b31fa98fe4538f6424c0079b61415 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 20 Jul 2016 11:54:03 +0200 Subject: fix qemu exit on memory hotplug when allocation fails at prealloc time When adding hostmem backend at runtime, QEMU might exit with error: "os_mem_prealloc: Insufficient free host memory pages available to allocate guest RAM" It happens due to os_mem_prealloc() not handling errors gracefully. Fix it by passing errp argument so that os_mem_prealloc() could report error to callers and undo performed allocation when os_mem_prealloc() fails. Signed-off-by: Igor Mammedov Message-Id: <1469008443-72059-1-git-send-email-imammedo@redhat.com> Reviewed-by: Markus Armbruster Signed-off-by: Paolo Bonzini --- backends/hostmem.c | 18 ++++++++++++++---- exec.c | 10 ++++++++-- include/qemu/osdep.h | 2 +- util/oslib-posix.c | 26 +++++++++++++------------- util/oslib-win32.c | 2 +- 5 files changed, 37 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/backends/hostmem.c b/backends/hostmem.c index ac802570a8..b7a208d0da 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -203,6 +203,7 @@ static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) static void host_memory_backend_set_prealloc(Object *obj, bool value, Error **errp) { + Error *local_err = NULL; HostMemoryBackend *backend = MEMORY_BACKEND(obj); if (backend->force_prealloc) { @@ -223,7 +224,11 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value, void *ptr = memory_region_get_ram_ptr(&backend->mr); uint64_t sz = memory_region_size(&backend->mr); - os_mem_prealloc(fd, ptr, sz); + os_mem_prealloc(fd, ptr, sz, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } backend->prealloc = true; } } @@ -286,8 +291,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) if (bc->alloc) { bc->alloc(backend, &local_err); if (local_err) { - error_propagate(errp, local_err); - return; + goto out; } ptr = memory_region_get_ram_ptr(&backend->mr); @@ -343,9 +347,15 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) * specified NUMA policy in place. */ if (backend->prealloc) { - os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz); + os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz, + &local_err); + if (local_err) { + goto out; + } } } +out: + error_propagate(errp, local_err); } static bool diff --git a/exec.c b/exec.c index 50e3ee237c..8ffde75983 100644 --- a/exec.c +++ b/exec.c @@ -1226,7 +1226,7 @@ static void *file_ram_alloc(RAMBlock *block, char *filename; char *sanitized_name; char *c; - void *area; + void *area = MAP_FAILED; int fd = -1; int64_t page_size; @@ -1314,13 +1314,19 @@ static void *file_ram_alloc(RAMBlock *block, } if (mem_prealloc) { - os_mem_prealloc(fd, area, memory); + os_mem_prealloc(fd, area, memory, errp); + if (errp && *errp) { + goto error; + } } block->fd = fd; return area; error: + if (area != MAP_FAILED) { + qemu_ram_munmap(area, memory); + } if (unlink_on_error) { unlink(path); } diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index fbb875959f..d7c111d169 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -379,7 +379,7 @@ unsigned long qemu_getauxval(unsigned long type); void qemu_set_tty_echo(int fd, bool echo); -void os_mem_prealloc(int fd, char *area, size_t sz); +void os_mem_prealloc(int fd, char *area, size_t sz, Error **errp); int qemu_read_password(char *buf, int buf_size); diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 6d70d9a706..f2d4e9e592 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -318,7 +318,7 @@ static void sigbus_handler(int signal) siglongjmp(sigjump, 1); } -void os_mem_prealloc(int fd, char *area, size_t memory) +void os_mem_prealloc(int fd, char *area, size_t memory, Error **errp) { int ret; struct sigaction act, oldact; @@ -330,8 +330,9 @@ void os_mem_prealloc(int fd, char *area, size_t memory) ret = sigaction(SIGBUS, &act, &oldact); if (ret) { - perror("os_mem_prealloc: failed to install signal handler"); - exit(1); + error_setg_errno(errp, errno, + "os_mem_prealloc: failed to install signal handler"); + return; } /* unblock SIGBUS */ @@ -340,9 +341,8 @@ void os_mem_prealloc(int fd, char *area, size_t memory) pthread_sigmask(SIG_UNBLOCK, &set, &oldset); if (sigsetjmp(sigjump, 1)) { - fprintf(stderr, "os_mem_prealloc: Insufficient free host memory " - "pages available to allocate guest RAM\n"); - exit(1); + error_setg(errp, "os_mem_prealloc: Insufficient free host memory " + "pages available to allocate guest RAM\n"); } else { int i; size_t hpagesize = qemu_fd_getpagesize(fd); @@ -352,15 +352,15 @@ void os_mem_prealloc(int fd, char *area, size_t memory) for (i = 0; i < numpages; i++) { memset(area + (hpagesize * i), 0, 1); } + } - ret = sigaction(SIGBUS, &oldact, NULL); - if (ret) { - perror("os_mem_prealloc: failed to reinstall signal handler"); - exit(1); - } - - pthread_sigmask(SIG_SETMASK, &oldset, NULL); + ret = sigaction(SIGBUS, &oldact, NULL); + if (ret) { + /* Terminate QEMU since it can't recover from error */ + perror("os_mem_prealloc: failed to reinstall signal handler"); + exit(1); } + pthread_sigmask(SIG_SETMASK, &oldset, NULL); } diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 6debc2b8a6..4c1dcf1e66 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -539,7 +539,7 @@ int getpagesize(void) return system_info.dwPageSize; } -void os_mem_prealloc(int fd, char *area, size_t memory) +void os_mem_prealloc(int fd, char *area, size_t memory, Error **errp) { int i; size_t pagesize = getpagesize(); -- cgit v1.2.3-55-g7522 From 00432b69538559285ca55677c1002cad556aea50 Mon Sep 17 00:00:00 2001 From: Cao jin Date: Thu, 21 Jul 2016 18:33:32 +0800 Subject: util: drop inet_nonblocking_connect() It is never used; all nonblocking connect now goes through socket_connect(), which calls inet_connect_addr(). Cc: Daniel P. Berrange Cc: Gerd Hoffmann Cc: Paolo Bonzini Signed-off-by: Cao jin Message-Id: <1469097213-26441-2-git-send-email-caoj.fnst@cn.fujitsu.com> Signed-off-by: Paolo Bonzini --- include/qemu/sockets.h | 3 --- util/qemu-sockets.c | 30 ------------------------------ 2 files changed, 33 deletions(-) (limited to 'include') diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index 5fe01fbc6c..2cbe6432cd 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -36,9 +36,6 @@ InetSocketAddress *inet_parse(const char *str, Error **errp); int inet_listen(const char *str, char *ostr, int olen, int socktype, int port_offset, Error **errp); int inet_connect(const char *str, Error **errp); -int inet_nonblocking_connect(const char *str, - NonBlockingConnectHandler *callback, - void *opaque, Error **errp); NetworkAddressFamily inet_netfamily(int family); diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 777af49d53..2e0570b2b7 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -674,36 +674,6 @@ int inet_connect(const char *str, Error **errp) return sock; } -/** - * Create a non-blocking socket and connect it to an address. - * Calls the callback function with fd in case of success or -1 in case of - * error. - * - * @str: address string - * @callback: callback function that is called when connect completes, - * cannot be NULL. - * @opaque: opaque for callback function - * @errp: set in case of an error - * - * Returns: -1 on immediate error, file descriptor on success. - **/ -int inet_nonblocking_connect(const char *str, - NonBlockingConnectHandler *callback, - void *opaque, Error **errp) -{ - int sock = -1; - InetSocketAddress *addr; - - g_assert(callback != NULL); - - addr = inet_parse(str, errp); - if (addr != NULL) { - sock = inet_connect_saddr(addr, errp, callback, opaque); - qapi_free_InetSocketAddress(addr); - } - return sock; -} - #ifndef _WIN32 static int unix_listen_saddr(UnixSocketAddress *saddr, -- cgit v1.2.3-55-g7522 From f8ea7a8656a807a8a3e4e2389059679716a75baa Mon Sep 17 00:00:00 2001 From: Cao jin Date: Thu, 21 Jul 2016 18:33:33 +0800 Subject: util: drop unix_nonblocking_connect() It is never used; all nonblocking connect now goes through socket_connect(), which calls unix_connect_addr(). Cc: Daniel P. Berrange Cc: Gerd Hoffmann Cc: Paolo Bonzini Signed-off-by: Cao jin Message-Id: <1469097213-26441-3-git-send-email-caoj.fnst@cn.fujitsu.com> Signed-off-by: Paolo Bonzini --- include/qemu/sockets.h | 3 --- util/qemu-sockets.c | 16 ---------------- 2 files changed, 19 deletions(-) (limited to 'include') diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index 2cbe6432cd..28a28c0717 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -41,9 +41,6 @@ NetworkAddressFamily inet_netfamily(int family); int unix_listen(const char *path, char *ostr, int olen, Error **errp); int unix_connect(const char *path, Error **errp); -int unix_nonblocking_connect(const char *str, - NonBlockingConnectHandler *callback, - void *opaque, Error **errp); SocketAddress *socket_parse(const char *str, Error **errp); int socket_connect(SocketAddress *addr, Error **errp, diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 2e0570b2b7..58f9a2cce0 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -863,22 +863,6 @@ int unix_connect(const char *path, Error **errp) } -int unix_nonblocking_connect(const char *path, - NonBlockingConnectHandler *callback, - void *opaque, Error **errp) -{ - UnixSocketAddress *saddr; - int sock = -1; - - g_assert(callback != NULL); - - saddr = g_new0(UnixSocketAddress, 1); - saddr->path = g_strdup(path); - sock = unix_connect_saddr(saddr, errp, callback, opaque); - qapi_free_UnixSocketAddress(saddr); - return sock; -} - SocketAddress *socket_parse(const char *str, Error **errp) { SocketAddress *addr; -- cgit v1.2.3-55-g7522 From 767db021bc45affa7a7c20ea21730d2867a0c37f Mon Sep 17 00:00:00 2001 From: Cao jin Date: Mon, 25 Jul 2016 21:02:51 +0800 Subject: util: Drop inet_listen() Since commit e65c67e4, inet_listen() is not used anymore, and all inet listen operation goes through QIOChannel. Cc: Daniel P. Berrange Cc: Gerd Hoffmann Cc: Paolo Bonzini Cc: Eric Blake Signed-off-by: Cao jin Message-Id: <1469451771-1173-3-git-send-email-caoj.fnst@cn.fujitsu.com> Signed-off-by: Paolo Bonzini --- include/qemu/sockets.h | 2 -- util/qemu-sockets.c | 28 ---------------------------- 2 files changed, 30 deletions(-) (limited to 'include') diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index 28a28c0717..9eb24707df 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -33,8 +33,6 @@ int socket_set_fast_reuse(int fd); typedef void NonBlockingConnectHandler(int fd, Error *err, void *opaque); InetSocketAddress *inet_parse(const char *str, Error **errp); -int inet_listen(const char *str, char *ostr, int olen, - int socktype, int port_offset, Error **errp); int inet_connect(const char *str, Error **errp); NetworkAddressFamily inet_netfamily(int family); diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 58f9a2cce0..2aed799e97 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -624,34 +624,6 @@ fail: return NULL; } -int inet_listen(const char *str, char *ostr, int olen, - int socktype, int port_offset, Error **errp) -{ - char *optstr; - int sock = -1; - InetSocketAddress *addr; - - addr = inet_parse(str, errp); - if (addr != NULL) { - sock = inet_listen_saddr(addr, port_offset, true, errp); - if (sock != -1 && ostr) { - optstr = strchr(str, ','); - if (addr->ipv6) { - snprintf(ostr, olen, "[%s]:%s%s", - addr->host, - addr->port, - optstr ? optstr : ""); - } else { - snprintf(ostr, olen, "%s:%s%s", - addr->host, - addr->port, - optstr ? optstr : ""); - } - } - qapi_free_InetSocketAddress(addr); - } - return sock; -} /** * Create a blocking socket and connect it to an address. -- cgit v1.2.3-55-g7522 From 7423f417827146f956df820f172d0bf80a489495 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 21 Jul 2016 13:34:46 -0600 Subject: nbd: Limit nbdflags to 16 bits Rather than asserting that nbdflags is within range, just give it the correct type to begin with :) nbdflags corresponds to the per-export portion of NBD Protocol "transmission flags", which is 16 bits in response to NBD_OPT_EXPORT_NAME and NBD_OPT_GO. Furthermore, upstream NBD has never passed the global flags to the kernel via ioctl(NBD_SET_FLAGS) (the ioctl was first introduced in NBD 2.9.22; then a latent bug in NBD 3.1 actually tried to OR the global flags with the transmission flags, with the disaster that the addition of NBD_FLAG_NO_ZEROES in 3.9 caused all earlier NBD 3.x clients to treat every export as read-only; NBD 3.10 and later intentionally clip things to 16 bits to pass only transmission flags). Qemu should follow suit, since the current two global flags (NBD_FLAG_FIXED_NEWSTYLE and NBD_FLAG_NO_ZEROES) have no impact on the kernel's behavior during transmission. CC: qemu-stable@nongnu.org Signed-off-by: Eric Blake Message-Id: <1469129688-22848-3-git-send-email-eblake@redhat.com> Signed-off-by: Paolo Bonzini --- block/nbd-client.h | 2 +- include/block/nbd.h | 6 +++--- nbd/client.c | 28 +++++++++++++++------------- nbd/server.c | 10 ++++------ qemu-nbd.c | 4 ++-- 5 files changed, 25 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/block/nbd-client.h b/block/nbd-client.h index fa9817b7d7..044aca4530 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -20,7 +20,7 @@ typedef struct NbdClientSession { QIOChannelSocket *sioc; /* The master data channel */ QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ - uint32_t nbdflags; + uint16_t nbdflags; off_t size; CoMutex send_mutex; diff --git a/include/block/nbd.h b/include/block/nbd.h index cb91820f38..1897557a9b 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -90,11 +90,11 @@ ssize_t nbd_wr_syncv(QIOChannel *ioc, size_t niov, size_t length, bool do_read); -int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags, +int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, QCryptoTLSCreds *tlscreds, const char *hostname, QIOChannel **outioc, off_t *size, Error **errp); -int nbd_init(int fd, QIOChannelSocket *sioc, uint32_t flags, off_t size); +int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size); ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request); ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply); int nbd_client(int fd); @@ -104,7 +104,7 @@ typedef struct NBDExport NBDExport; typedef struct NBDClient NBDClient; NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size, - uint32_t nbdflags, void (*close)(NBDExport *), + uint16_t nbdflags, void (*close)(NBDExport *), Error **errp); void nbd_export_close(NBDExport *exp); void nbd_export_get(NBDExport *exp); diff --git a/nbd/client.c b/nbd/client.c index 78a7195c45..a92f1e2275 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -408,7 +408,7 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, } -int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags, +int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, QCryptoTLSCreds *tlscreds, const char *hostname, QIOChannel **outioc, off_t *size, Error **errp) @@ -468,7 +468,6 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags, uint32_t opt; uint32_t namesize; uint16_t globalflags; - uint16_t exportflags; bool fixedNewStyle = false; if (read_sync(ioc, &globalflags, sizeof(globalflags)) != @@ -477,7 +476,6 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags, goto fail; } globalflags = be16_to_cpu(globalflags); - *flags = globalflags << 16; TRACE("Global flags are %" PRIx32, globalflags); if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) { fixedNewStyle = true; @@ -545,17 +543,15 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags, goto fail; } *size = be64_to_cpu(s); - TRACE("Size is %" PRIu64, *size); - if (read_sync(ioc, &exportflags, sizeof(exportflags)) != - sizeof(exportflags)) { + if (read_sync(ioc, flags, sizeof(*flags)) != sizeof(*flags)) { error_setg(errp, "Failed to read export flags"); goto fail; } - exportflags = be16_to_cpu(exportflags); - *flags |= exportflags; - TRACE("Export flags are %" PRIx16, exportflags); + be16_to_cpus(flags); } else if (magic == NBD_CLIENT_MAGIC) { + uint32_t oldflags; + if (name) { error_setg(errp, "Server does not support export names"); goto fail; @@ -572,16 +568,22 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags, *size = be64_to_cpu(s); TRACE("Size is %" PRIu64, *size); - if (read_sync(ioc, flags, sizeof(*flags)) != sizeof(*flags)) { + if (read_sync(ioc, &oldflags, sizeof(oldflags)) != sizeof(oldflags)) { error_setg(errp, "Failed to read export flags"); goto fail; } - *flags = be32_to_cpu(*flags); + be32_to_cpus(&oldflags); + if (oldflags & ~0xffff) { + error_setg(errp, "Unexpected export flags %0x" PRIx32, oldflags); + goto fail; + } + *flags = oldflags; } else { error_setg(errp, "Bad magic received"); goto fail; } + TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags); if (read_sync(ioc, &buf, 124) != 124) { error_setg(errp, "Failed to read reserved block"); goto fail; @@ -593,7 +595,7 @@ fail: } #ifdef __linux__ -int nbd_init(int fd, QIOChannelSocket *sioc, uint32_t flags, off_t size) +int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size) { unsigned long sectors = size / BDRV_SECTOR_SIZE; if (size / BDRV_SECTOR_SIZE != sectors) { @@ -689,7 +691,7 @@ int nbd_disconnect(int fd) } #else -int nbd_init(int fd, QIOChannelSocket *ioc, uint32_t flags, off_t size) +int nbd_init(int fd, QIOChannelSocket *ioc, uint16_t flags, off_t size) { return -ENOTSUP; } diff --git a/nbd/server.c b/nbd/server.c index 3c1e2b336b..80fbb4da1d 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -63,7 +63,7 @@ struct NBDExport { char *name; off_t dev_offset; off_t size; - uint32_t nbdflags; + uint16_t nbdflags; QTAILQ_HEAD(, NBDClient) clients; QTAILQ_ENTRY(NBDExport) next; @@ -544,8 +544,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) NBDClient *client = data->client; char buf[8 + 8 + 8 + 128]; int rc; - const int myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | - NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA); + const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | + NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA); bool oldStyle; /* Old style negotiation header without options @@ -575,7 +575,6 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) oldStyle = client->exp != NULL && !client->tlscreds; if (oldStyle) { - assert ((client->exp->nbdflags & ~65535) == 0); TRACE("advertising size %" PRIu64 " and flags %x", client->exp->size, client->exp->nbdflags | myflags); stq_be_p(buf + 8, NBD_CLIENT_MAGIC); @@ -606,7 +605,6 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) goto fail; } - assert ((client->exp->nbdflags & ~65535) == 0); TRACE("advertising size %" PRIu64 " and flags %x", client->exp->size, client->exp->nbdflags | myflags); stq_be_p(buf + 18, client->exp->size); @@ -810,7 +808,7 @@ static void nbd_eject_notifier(Notifier *n, void *data) } NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size, - uint32_t nbdflags, void (*close)(NBDExport *), + uint16_t nbdflags, void (*close)(NBDExport *), Error **errp) { NBDExport *exp = g_malloc0(sizeof(NBDExport)); diff --git a/qemu-nbd.c b/qemu-nbd.c index 321f02bd15..e3571c2025 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -251,7 +251,7 @@ static void *nbd_client_thread(void *arg) { char *device = arg; off_t size; - uint32_t nbdflags; + uint16_t nbdflags; QIOChannelSocket *sioc; int fd; int ret; @@ -465,7 +465,7 @@ int main(int argc, char **argv) BlockBackend *blk; BlockDriverState *bs; off_t dev_offset = 0; - uint32_t nbdflags = 0; + uint16_t nbdflags = 0; bool disconnect = false; const char *bindto = "0.0.0.0"; const char *port = NULL; -- cgit v1.2.3-55-g7522 From e9fd416e66539ad43bbab018f346cb164136c099 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 21 Jul 2016 13:34:47 -0600 Subject: osdep: Document differences in rounding macros Make it obvious which macros are safe in which situations. Useful since QEMU_ALIGN_UP and ROUND_UP both purport to do the same thing, but differ on whether the alignment must be a power of 2. Signed-off-by: Eric Blake Message-Id: <1469129688-22848-4-git-send-email-eblake@redhat.com> Signed-off-by: Paolo Bonzini --- include/qemu/osdep.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index d7c111d169..9e9fa61546 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -158,7 +158,8 @@ extern int daemon(int, int); /* Round number down to multiple */ #define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m)) -/* Round number up to multiple */ +/* Round number up to multiple. Safe when m is not a power of 2 (see + * ROUND_UP for a faster version when a power of 2 is guaranteed) */ #define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m)) /* Check if n is a multiple of m */ @@ -175,6 +176,9 @@ extern int daemon(int, int); /* Check if pointer p is n-bytes aligned */ #define QEMU_PTR_IS_ALIGNED(p, n) QEMU_IS_ALIGNED((uintptr_t)(p), (n)) +/* Round number up to multiple. Requires that d be a power of 2 (see + * QEMU_ALIGN_UP for a safer but slower version on arbitrary + * numbers) */ #ifndef ROUND_UP #define ROUND_UP(n,d) (((n) + (d) - 1) & -(d)) #endif -- cgit v1.2.3-55-g7522 From b8d0a9804d0bd43c9b662a6917ae9cd514a54dff Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 21 Jul 2016 13:34:48 -0600 Subject: block: Cater to iscsi with non-power-of-2 discard Dell Equallogic iSCSI SANs have a very unusual advertised geometry: $ iscsi-inq -e 1 -c $((0xb0)) iscsi://XXX/0 wsnz:0 maximum compare and write length:1 optimal transfer length granularity:0 maximum transfer length:0 optimal transfer length:0 maximum prefetch xdread xdwrite transfer length:0 maximum unmap lba count:30720 maximum unmap block descriptor count:2 optimal unmap granularity:30720 ugavalid:1 unmap granularity alignment:0 maximum write same length:30720 which says that both the maximum and the optimal discard size is 15M. It is not immediately apparent if the device allows discard requests not aligned to the optimal size, nor if it allows discards at a finer granularity than the optimal size. I tried to find details in the SCSI Commands Reference Manual Rev. A on what valid values of maximum and optimal sizes are permitted, but while that document mentions a "Block Limits VPD Page", I couldn't actually find documentation of that page or what values it would have, or if a SCSI device has an advertisement of its minimal unmap granularity. So it is not obvious to me whether the Dell Equallogic device is compliance with the SCSI specification. Fortunately, it is easy enough to support non-power-of-2 sizing, even if it means we are less efficient than truly possible when targetting that device (for example, it means that we refuse to unmap anything that is not a multiple of 15M and aligned to a 15M boundary, even if the device truly does support a smaller granularity where unmapping actually works). Reported-by: Peter Lieven Signed-off-by: Eric Blake Message-Id: <1469129688-22848-5-git-send-email-eblake@redhat.com> Acked-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini --- block/io.c | 15 +++++++++------ include/block/block_int.h | 37 ++++++++++++++++++++----------------- 2 files changed, 29 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/block/io.c b/block/io.c index 7323f0fb7b..d5493ba349 100644 --- a/block/io.c +++ b/block/io.c @@ -1180,10 +1180,11 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int alignment = MAX(bs->bl.pwrite_zeroes_alignment, bs->bl.request_alignment); - assert(is_power_of_2(alignment)); - head = offset & (alignment - 1); - tail = (offset + count) & (alignment - 1); - max_write_zeroes &= ~(alignment - 1); + assert(alignment % bs->bl.request_alignment == 0); + head = offset % alignment; + tail = (offset + count) % alignment; + max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment); + assert(max_write_zeroes >= bs->bl.request_alignment); while (count > 0 && !ret) { int num = count; @@ -2429,9 +2430,10 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, /* Discard is advisory, so ignore any unaligned head or tail */ align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment); - assert(is_power_of_2(align)); - head = MIN(count, -offset & (align - 1)); + assert(align % bs->bl.request_alignment == 0); + head = offset % align; if (head) { + head = MIN(count, align - head); count -= head; offset += head; } @@ -2449,6 +2451,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX), align); + assert(max_pdiscard); while (count > 0) { int ret; diff --git a/include/block/block_int.h b/include/block/block_int.h index 1fe0fd9ff6..47665be81e 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -330,36 +330,39 @@ typedef struct BlockLimits { * otherwise. */ uint32_t request_alignment; - /* maximum number of bytes that can be discarded at once (since it - * is signed, it must be < 2G, if set), should be multiple of + /* Maximum number of bytes that can be discarded at once (since it + * is signed, it must be < 2G, if set). Must be multiple of * pdiscard_alignment, but need not be power of 2. May be 0 if no * inherent 32-bit limit */ int32_t max_pdiscard; - /* optimal alignment for discard requests in bytes, must be power - * of 2, less than max_pdiscard if that is set, and multiple of - * bl.request_alignment. May be 0 if bl.request_alignment is good - * enough */ + /* Optimal alignment for discard requests in bytes. A power of 2 + * is best but not mandatory. Must be a multiple of + * bl.request_alignment, and must be less than max_pdiscard if + * that is set. May be 0 if bl.request_alignment is good enough */ uint32_t pdiscard_alignment; - /* maximum number of bytes that can zeroized at once (since it is - * signed, it must be < 2G, if set), should be multiple of + /* Maximum number of bytes that can zeroized at once (since it is + * signed, it must be < 2G, if set). Must be multiple of * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */ int32_t max_pwrite_zeroes; - /* optimal alignment for write zeroes requests in bytes, must be - * power of 2, less than max_pwrite_zeroes if that is set, and - * multiple of bl.request_alignment. May be 0 if - * bl.request_alignment is good enough */ + /* Optimal alignment for write zeroes requests in bytes. A power + * of 2 is best but not mandatory. Must be a multiple of + * bl.request_alignment, and must be less than max_pwrite_zeroes + * if that is set. May be 0 if bl.request_alignment is good + * enough */ uint32_t pwrite_zeroes_alignment; - /* optimal transfer length in bytes (must be power of 2, and - * multiple of bl.request_alignment), or 0 if no preferred size */ + /* Optimal transfer length in bytes. A power of 2 is best but not + * mandatory. Must be a multiple of bl.request_alignment, or 0 if + * no preferred size */ uint32_t opt_transfer; - /* maximal transfer length in bytes (need not be power of 2, but - * should be multiple of opt_transfer), or 0 for no 32-bit limit. - * For now, anything larger than INT_MAX is clamped down. */ + /* Maximal transfer length in bytes. Need not be power of 2, but + * must be multiple of opt_transfer and bl.request_alignment, or 0 + * for no 32-bit limit. For now, anything larger than INT_MAX is + * clamped down. */ uint32_t max_transfer; /* memory alignment, in bytes so that no bounce buffer is needed */ -- cgit v1.2.3-55-g7522 From 7298d4fd515c190b1b6c1266735f6212300313ae Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Fri, 29 Jul 2016 15:55:42 +0200 Subject: apic: fix broken migration for kvm-apic commit f6e98444 (apic: Use apic_id as apic's migration instance_id) breaks migration when in kernel irqchip is used for 2.6 and older machine types. It applies compat property only for userspace 'apic' type instead of applying it to all apic types inherited from 'apic-common' type as it was supposed to do. Fix it by setting compat property 'legacy-instance-id' for 'apic-common' type which affects inherited types (i.e. not only 'apic' but also 'kvm-apic' types) Signed-off-by: Igor Mammedov Message-Id: <1469800542-11402-1-git-send-email-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini --- include/hw/i386/pc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index c87c5c1eec..74c175c1e5 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -388,7 +388,7 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .value = "off",\ },\ {\ - .driver = "apic",\ + .driver = "apic-common",\ .property = "legacy-instance-id",\ .value = "on",\ }, -- cgit v1.2.3-55-g7522 From 20fd4b7b6d9282fe0cb83601f1821f31bd257458 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 1 Aug 2016 21:59:19 +0800 Subject: x86: ioapic: add support for explicit EOI Some old Linux kernels (upstream before v4.0), or any released RHEL kernels has problem in sending APIC EOI when IR is enabled. Meanwhile, many of them only support explicit EOI for IOAPIC, which is only introduced in IOAPIC version 0x20. This patch provide a way to boost QEMU IOAPIC to version 0x20, in order for QEMU to correctly receive EOI messages. Without boosting IOAPIC version to 0x20, kernels before commit d32932d ("x86/irq: Convert IOAPIC to use hierarchical irqdomain interfaces") will have trouble enabling both IR and level-triggered interrupt devices (like e1000). To upgrade IOAPIC to version 0x20, we need to specify: -global ioapic.version=0x20 To be compatible with old systems, 0x11 will still be the default IOAPIC version. Here 0x11 and 0x20 are the only versions to be supported. One thing to mention: this patch only applies to emulated IOAPIC. It does not affect kernel IOAPIC behavior. Signed-off-by: Peter Xu Message-Id: <1470059959-372-1-git-send-email-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- hw/intc/ioapic.c | 22 +++++++++++++++++++++- include/hw/i386/ioapic_internal.h | 4 ++-- 2 files changed, 23 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c index a00d88210a..31791b0986 100644 --- a/hw/intc/ioapic.c +++ b/hw/intc/ioapic.c @@ -21,6 +21,7 @@ */ #include "qemu/osdep.h" +#include "qemu/error-report.h" #include "monitor/monitor.h" #include "hw/hw.h" #include "hw/i386/pc.h" @@ -269,7 +270,7 @@ ioapic_mem_read(void *opaque, hwaddr addr, unsigned int size) val = s->id << IOAPIC_ID_SHIFT; break; case IOAPIC_REG_VER: - val = IOAPIC_VERSION | + val = s->version | ((IOAPIC_NUM_PINS - 1) << IOAPIC_VER_ENTRIES_SHIFT); break; default: @@ -358,6 +359,13 @@ ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val, } } break; + case IOAPIC_EOI: + /* Explicit EOI is only supported for IOAPIC version 0x20 */ + if (size != 4 || s->version != 0x20) { + break; + } + ioapic_eoi_broadcast(val); + break; } ioapic_update_kvm_routes(s); @@ -391,6 +399,12 @@ static void ioapic_realize(DeviceState *dev, Error **errp) { IOAPICCommonState *s = IOAPIC_COMMON(dev); + if (s->version != 0x11 && s->version != 0x20) { + error_report("IOAPIC only supports version 0x11 or 0x20 " + "(default: 0x11)."); + exit(1); + } + memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s, "ioapic", 0x1000); @@ -401,6 +415,11 @@ static void ioapic_realize(DeviceState *dev, Error **errp) qemu_add_machine_init_done_notifier(&s->machine_done); } +static Property ioapic_properties[] = { + DEFINE_PROP_UINT8("version", IOAPICCommonState, version, 0x11), + DEFINE_PROP_END_OF_LIST(), +}; + static void ioapic_class_init(ObjectClass *klass, void *data) { IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass); @@ -408,6 +427,7 @@ static void ioapic_class_init(ObjectClass *klass, void *data) k->realize = ioapic_realize; dc->reset = ioapic_reset_common; + dc->props = ioapic_properties; } static const TypeInfo ioapic_info = { diff --git a/include/hw/i386/ioapic_internal.h b/include/hw/i386/ioapic_internal.h index d89ea1b63b..a11d86de46 100644 --- a/include/hw/i386/ioapic_internal.h +++ b/include/hw/i386/ioapic_internal.h @@ -29,8 +29,6 @@ #define MAX_IOAPICS 1 -#define IOAPIC_VERSION 0x11 - #define IOAPIC_LVT_DEST_SHIFT 56 #define IOAPIC_LVT_DEST_IDX_SHIFT 48 #define IOAPIC_LVT_MASKED_SHIFT 16 @@ -71,6 +69,7 @@ #define IOAPIC_IOREGSEL 0x00 #define IOAPIC_IOWIN 0x10 +#define IOAPIC_EOI 0x40 #define IOAPIC_REG_ID 0x00 #define IOAPIC_REG_VER 0x01 @@ -109,6 +108,7 @@ struct IOAPICCommonState { uint32_t irr; uint64_t ioredtbl[IOAPIC_NUM_PINS]; Notifier machine_done; + uint8_t version; }; void ioapic_reset_common(DeviceState *dev); -- cgit v1.2.3-55-g7522