diff options
-rwxr-xr-x | configure | 22 | ||||
-rw-r--r-- | fsdev/9p-iov-marshal.c | 4 | ||||
-rw-r--r-- | fsdev/virtfs-proxy-helper.c | 11 | ||||
-rw-r--r-- | hw/9pfs/9p-handle.c | 5 | ||||
-rw-r--r-- | hw/9pfs/9p-local.c | 186 | ||||
-rw-r--r-- | hw/9pfs/9p-util.c | 43 | ||||
-rw-r--r-- | hw/9pfs/9p-util.h | 2 | ||||
-rw-r--r-- | hw/9pfs/9p.c | 51 | ||||
-rw-r--r-- | hw/9pfs/9p.h | 2 | ||||
-rw-r--r-- | hw/9pfs/virtio-9p-device.c | 8 | ||||
-rw-r--r-- | hw/9pfs/xen-9p-backend.c | 6 | ||||
-rw-r--r-- | include/sysemu/os-posix.h | 11 | ||||
-rw-r--r-- | include/sysemu/sysemu.h | 47 | ||||
-rw-r--r-- | linux-user/strace.c | 177 | ||||
-rw-r--r-- | linux-user/strace.list | 16 | ||||
-rw-r--r-- | linux-user/syscall.c | 161 | ||||
-rw-r--r-- | migration/colo.c | 1 | ||||
-rw-r--r-- | migration/migration.c | 1 | ||||
-rw-r--r-- | migration/postcopy-ram.c | 1 | ||||
-rw-r--r-- | migration/ram.c | 23 | ||||
-rw-r--r-- | migration/savevm.c | 29 | ||||
-rw-r--r-- | migration/savevm.h | 41 | ||||
-rw-r--r-- | numa.c | 3 | ||||
-rw-r--r-- | target/sh4/cpu.h | 12 | ||||
-rw-r--r-- | target/sh4/helper.c | 28 | ||||
-rw-r--r-- | target/sh4/translate.c | 25 | ||||
-rw-r--r-- | tests/numa-test.c | 14 | ||||
-rw-r--r-- | util/oslib-posix.c | 47 |
28 files changed, 592 insertions, 385 deletions
@@ -3629,25 +3629,6 @@ if compile_prog "" "" ; then inotify1=yes fi -# check if utimensat and futimens are supported -utimens=no -cat > $TMPC << EOF -#define _ATFILE_SOURCE -#include <stddef.h> -#include <fcntl.h> -#include <sys/stat.h> - -int main(void) -{ - utimensat(AT_FDCWD, "foo", NULL, 0); - futimens(0, NULL); - return 0; -} -EOF -if compile_prog "" "" ; then - utimens=yes -fi - # check if pipe2 is there pipe2=no cat > $TMPC << EOF @@ -5434,9 +5415,6 @@ fi if test "$curses" = "yes" ; then echo "CONFIG_CURSES=y" >> $config_host_mak fi -if test "$utimens" = "yes" ; then - echo "CONFIG_UTIMENSAT=y" >> $config_host_mak -fi if test "$pipe2" = "yes" ; then echo "CONFIG_PIPE2=y" >> $config_host_mak fi diff --git a/fsdev/9p-iov-marshal.c b/fsdev/9p-iov-marshal.c index 1d16f8df4b..a1c9beddd2 100644 --- a/fsdev/9p-iov-marshal.c +++ b/fsdev/9p-iov-marshal.c @@ -168,7 +168,7 @@ ssize_t v9fs_iov_vunmarshal(struct iovec *out_sg, int out_num, size_t offset, break; } default: - break; + g_assert_not_reached(); } if (copied < 0) { return copied; @@ -281,7 +281,7 @@ ssize_t v9fs_iov_vmarshal(struct iovec *in_sg, int in_num, size_t offset, break; } default: - break; + g_assert_not_reached(); } if (copied < 0) { return copied; diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c index 54f7ad1c48..6c066ec9a0 100644 --- a/fsdev/virtfs-proxy-helper.c +++ b/fsdev/virtfs-proxy-helper.c @@ -945,7 +945,8 @@ static int process_requests(int sock) &spec[0].tv_sec, &spec[0].tv_nsec, &spec[1].tv_sec, &spec[1].tv_nsec); if (retval > 0) { - retval = qemu_utimens(path.data, spec); + retval = utimensat(AT_FDCWD, path.data, spec, + AT_SYMLINK_NOFOLLOW); if (retval < 0) { retval = -errno; } @@ -1129,14 +1130,14 @@ int main(int argc, char **argv) } } - if (chdir("/") < 0) { - do_perror("chdir"); - goto error; - } if (chroot(rpath) < 0) { do_perror("chroot"); goto error; } + if (chdir("/") < 0) { + do_perror("chdir"); + goto error; + } get_version = false; #ifdef FS_IOC_GETVERSION diff --git a/hw/9pfs/9p-handle.c b/hw/9pfs/9p-handle.c index 1687661bc9..9875f1894c 100644 --- a/hw/9pfs/9p-handle.c +++ b/hw/9pfs/9p-handle.c @@ -378,7 +378,6 @@ static int handle_utimensat(FsContext *ctx, V9fsPath *fs_path, const struct timespec *buf) { int ret; -#ifdef CONFIG_UTIMENSAT int fd; struct handle_data *data = (struct handle_data *)ctx->private; @@ -388,10 +387,6 @@ static int handle_utimensat(FsContext *ctx, V9fsPath *fs_path, } ret = futimens(fd, buf); close(fd); -#else - ret = -1; - errno = ENOSYS; -#endif return ret; } diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index a2486566af..1e78b7c9e9 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -53,13 +53,37 @@ int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags, mode_t mode) { LocalData *data = fs_ctx->private; - - /* All paths are relative to the path data->mountfd points to */ - while (*path == '/') { - path++; + int fd = data->mountfd; + + while (*path && fd != -1) { + const char *c; + int next_fd; + char *head; + + /* Only relative paths without consecutive slashes */ + assert(*path != '/'); + + head = g_strdup(path); + c = strchrnul(path, '/'); + if (*c) { + /* Intermediate path element */ + head[c - path] = 0; + path = c + 1; + next_fd = openat_dir(fd, head); + } else { + /* Rightmost path element */ + next_fd = openat_file(fd, head, flags, mode); + path = c; + } + g_free(head); + if (fd != data->mountfd) { + close_preserve_errno(fd); + } + fd = next_fd; } - return relative_openat_nofollow(data->mountfd, path, flags, mode); + assert(fd != data->mountfd); + return fd; } int local_opendir_nofollow(FsContext *fs_ctx, const char *path) @@ -83,6 +107,7 @@ static void unlinkat_preserve_errno(int dirfd, const char *path, int flags) } #define VIRTFS_META_DIR ".virtfs_metadata" +#define VIRTFS_META_ROOT_FILE VIRTFS_META_DIR "_root" static FILE *local_fopenat(int dirfd, const char *name, const char *mode) { @@ -119,13 +144,17 @@ static void local_mapped_file_attr(int dirfd, const char *name, char buf[ATTR_MAX]; int map_dirfd; - map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); - if (map_dirfd == -1) { - return; - } + if (strcmp(name, ".")) { + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + if (map_dirfd == -1) { + return; + } - fp = local_fopenat(map_dirfd, name, "r"); - close_preserve_errno(map_dirfd); + fp = local_fopenat(map_dirfd, name, "r"); + close_preserve_errno(map_dirfd); + } else { + fp = local_fopenat(dirfd, VIRTFS_META_ROOT_FILE, "r"); + } if (!fp) { return; } @@ -203,26 +232,38 @@ static int local_set_mapped_file_attrat(int dirfd, const char *name, int ret; char buf[ATTR_MAX]; int uid = -1, gid = -1, mode = -1, rdev = -1; - int map_dirfd; - - ret = mkdirat(dirfd, VIRTFS_META_DIR, 0700); - if (ret < 0 && errno != EEXIST) { - return -1; - } - - map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); - if (map_dirfd == -1) { - return -1; - } + int map_dirfd = -1, map_fd; + bool is_root = !strcmp(name, "."); + + if (is_root) { + fp = local_fopenat(dirfd, VIRTFS_META_ROOT_FILE, "r"); + if (!fp) { + if (errno == ENOENT) { + goto update_map_file; + } else { + return -1; + } + } + } else { + ret = mkdirat(dirfd, VIRTFS_META_DIR, 0700); + if (ret < 0 && errno != EEXIST) { + return -1; + } - fp = local_fopenat(map_dirfd, name, "r"); - if (!fp) { - if (errno == ENOENT) { - goto update_map_file; - } else { - close_preserve_errno(map_dirfd); + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + if (map_dirfd == -1) { return -1; } + + fp = local_fopenat(map_dirfd, name, "r"); + if (!fp) { + if (errno == ENOENT) { + goto update_map_file; + } else { + close_preserve_errno(map_dirfd); + return -1; + } + } } memset(buf, 0, ATTR_MAX); while (fgets(buf, ATTR_MAX, fp)) { @@ -240,12 +281,26 @@ static int local_set_mapped_file_attrat(int dirfd, const char *name, fclose(fp); update_map_file: - fp = local_fopenat(map_dirfd, name, "w"); - close_preserve_errno(map_dirfd); + if (is_root) { + fp = local_fopenat(dirfd, VIRTFS_META_ROOT_FILE, "w"); + } else { + fp = local_fopenat(map_dirfd, name, "w"); + /* We can't go this far with map_dirfd not being a valid file descriptor + * but some versions of gcc aren't smart enough to see it. + */ + if (map_dirfd != -1) { + close_preserve_errno(map_dirfd); + } + } if (!fp) { return -1; } + map_fd = fileno(fp); + assert(map_fd != -1); + ret = fchmod(map_fd, 0600); + assert(ret == 0); + if (credp->fc_uid != -1) { uid = credp->fc_uid; } @@ -454,7 +509,8 @@ static off_t local_telldir(FsContext *ctx, V9fsFidOpenState *fs) static bool local_is_mapped_file_metadata(FsContext *fs_ctx, const char *name) { - return !strcmp(name, VIRTFS_META_DIR); + return + !strcmp(name, VIRTFS_META_DIR) || !strcmp(name, VIRTFS_META_ROOT_FILE); } static struct dirent *local_readdir(FsContext *ctx, V9fsFidOpenState *fs) @@ -471,7 +527,7 @@ again: entry->d_type = DT_UNKNOWN; } else if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { if (local_is_mapped_file_metadata(ctx, entry->d_name)) { - /* skip the meta data directory */ + /* skip the meta data */ goto again; } entry->d_type = DT_UNKNOWN; @@ -992,6 +1048,14 @@ static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name, if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { int map_dirfd; + /* We need to remove the metadata as well: + * - the metadata directory if we're removing a directory + * - the metadata file in the parent's metadata directory + * + * If any of these are missing (ie, ENOENT) then we're probably + * trying to remove something that wasn't created in mapped-file + * mode. We just ignore the error. + */ if (flags == AT_REMOVEDIR) { int fd; @@ -999,32 +1063,20 @@ static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name, if (fd == -1) { goto err_out; } - /* - * If directory remove .virtfs_metadata contained in the - * directory - */ ret = unlinkat(fd, VIRTFS_META_DIR, AT_REMOVEDIR); close_preserve_errno(fd); if (ret < 0 && errno != ENOENT) { - /* - * We didn't had the .virtfs_metadata file. May be file created - * in non-mapped mode ?. Ignore ENOENT. - */ goto err_out; } } - /* - * Now remove the name from parent directory - * .virtfs_metadata directory. - */ map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); - ret = unlinkat(map_dirfd, name, 0); - close_preserve_errno(map_dirfd); - if (ret < 0 && errno != ENOENT) { - /* - * We didn't had the .virtfs_metadata file. May be file created - * in non-mapped mode ?. Ignore ENOENT. - */ + if (map_dirfd != -1) { + ret = unlinkat(map_dirfd, name, 0); + close_preserve_errno(map_dirfd); + if (ret < 0 && errno != ENOENT) { + goto err_out; + } + } else if (errno != ENOENT) { goto err_out; } } @@ -1138,14 +1190,32 @@ static int local_name_to_path(FsContext *ctx, V9fsPath *dir_path, } if (dir_path) { - v9fs_path_sprintf(target, "%s/%s", dir_path->data, name); - } else if (strcmp(name, "/")) { - v9fs_path_sprintf(target, "%s", name); + if (!strcmp(name, ".")) { + /* "." relative to "foo/bar" is "foo/bar" */ + v9fs_path_copy(target, dir_path); + } else if (!strcmp(name, "..")) { + if (!strcmp(dir_path->data, ".")) { + /* ".." relative to the root is "." */ + v9fs_path_sprintf(target, "."); + } else { + char *tmp = g_path_get_dirname(dir_path->data); + /* Symbolic links are resolved by the client. We can assume + * that ".." relative to "foo/bar" is equivalent to "foo" + */ + v9fs_path_sprintf(target, "%s", tmp); + g_free(tmp); + } + } else { + assert(!strchr(name, '/')); + v9fs_path_sprintf(target, "%s/%s", dir_path->data, name); + } + } else if (!strcmp(name, "/") || !strcmp(name, ".") || + !strcmp(name, "..")) { + /* This is the root fid */ + v9fs_path_sprintf(target, "."); } else { - /* We want the path of the export root to be relative, otherwise - * "*at()" syscalls would treat it as "/" in the host. - */ - v9fs_path_sprintf(target, "%s", "."); + assert(!strchr(name, '/')); + v9fs_path_sprintf(target, "./%s", name); } return 0; } diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util.c index fdb4d57376..f709c27a1f 100644 --- a/hw/9pfs/9p-util.c +++ b/hw/9pfs/9p-util.c @@ -14,49 +14,6 @@ #include "qemu/xattr.h" #include "9p-util.h" -int relative_openat_nofollow(int dirfd, const char *path, int flags, - mode_t mode) -{ - int fd; - - fd = dup(dirfd); - if (fd == -1) { - return -1; - } - - while (*path) { - const char *c; - int next_fd; - char *head; - - /* Only relative paths without consecutive slashes */ - assert(path[0] != '/'); - - head = g_strdup(path); - c = strchr(path, '/'); - if (c) { - head[c - path] = 0; - next_fd = openat_dir(fd, head); - } else { - next_fd = openat_file(fd, head, flags, mode); - } - g_free(head); - if (next_fd == -1) { - close_preserve_errno(fd); - return -1; - } - close(fd); - fd = next_fd; - - if (!c) { - break; - } - path = c + 1; - } - - return fd; -} - ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name, void *value, size_t size) { diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h index 517027c520..91299a24b8 100644 --- a/hw/9pfs/9p-util.h +++ b/hw/9pfs/9p-util.h @@ -50,8 +50,6 @@ static inline int openat_file(int dirfd, const char *name, int flags, return fd; } -int relative_openat_nofollow(int dirfd, const char *path, int flags, - mode_t mode); ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name, void *value, size_t size); int fsetxattrat_nofollow(int dirfd, const char *path, const char *name, diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index ab3e22f231..96d2683348 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -65,11 +65,6 @@ ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) return ret; } -static void pdu_push_and_notify(V9fsPDU *pdu) -{ - pdu->s->transport->push_and_notify(pdu); -} - static int omode_to_uflags(int8_t mode) { int ret = 0; @@ -668,7 +663,7 @@ static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len) pdu->size = len; pdu->id = id; - pdu_push_and_notify(pdu); + pdu->s->transport->push_and_notify(pdu); /* Now wakeup anybody waiting in flush for this request */ if (!qemu_co_queue_next(&pdu->complete)) { @@ -2576,7 +2571,10 @@ static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp, err = -EINVAL; goto out; } - v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path); + err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path); + if (err < 0) { + goto out; + } } else { old_name = fidp->path.data; end = strrchr(old_name, '/'); @@ -2588,8 +2586,11 @@ static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp, new_name = g_malloc0(end - old_name + name->size + 1); strncat(new_name, old_name, end - old_name); strncat(new_name + (end - old_name), name->data, name->size); - v9fs_co_name_to_path(pdu, NULL, new_name, &new_path); + err = v9fs_co_name_to_path(pdu, NULL, new_name, &new_path); g_free(new_name); + if (err < 0) { + goto out; + } } err = v9fs_co_rename(pdu, &fidp->path, &new_path); if (err < 0) { @@ -2669,20 +2670,26 @@ out_nofid: v9fs_string_free(&name); } -static void coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir, - V9fsString *old_name, - V9fsPath *newdir, - V9fsString *new_name) +static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir, + V9fsString *old_name, + V9fsPath *newdir, + V9fsString *new_name) { V9fsFidState *tfidp; V9fsPath oldpath, newpath; V9fsState *s = pdu->s; - + int err; v9fs_path_init(&oldpath); v9fs_path_init(&newpath); - v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath); - v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath); + err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath); + if (err < 0) { + goto out; + } + err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath); + if (err < 0) { + goto out; + } /* * Fixup fid's pointing to the old name to @@ -2694,8 +2701,10 @@ static void coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir, v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data)); } } +out: v9fs_path_free(&oldpath); v9fs_path_free(&newpath); + return err; } static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid, @@ -2729,8 +2738,8 @@ static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid, } if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { /* Only for path based fid we need to do the below fixup */ - v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name, - &newdirfidp->path, new_name); + err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name, + &newdirfidp->path, new_name); } out: if (olddirfidp) { @@ -3446,12 +3455,16 @@ static inline bool is_read_only_op(V9fsPDU *pdu) } } -void pdu_submit(V9fsPDU *pdu) +void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr) { Coroutine *co; CoroutineEntry *handler; V9fsState *s = pdu->s; + pdu->size = le32_to_cpu(hdr->size_le); + pdu->id = hdr->id; + pdu->tag = le16_to_cpu(hdr->tag_le); + if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) || (pdu_co_handlers[pdu->id] == NULL)) { handler = v9fs_op_not_supp; @@ -3462,6 +3475,8 @@ void pdu_submit(V9fsPDU *pdu) if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) { handler = v9fs_fs_ro; } + + qemu_co_queue_init(&pdu->complete); co = qemu_coroutine_create(handler, pdu); qemu_coroutine_enter(co); } diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index 5312d8a424..c886ba78d2 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -347,7 +347,7 @@ ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...); ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...); V9fsPDU *pdu_alloc(V9fsState *s); void pdu_free(V9fsPDU *pdu); -void pdu_submit(V9fsPDU *pdu); +void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr); void v9fs_reset(V9fsState *s); struct V9fsTransport { diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index 3782f43702..245abd8aae 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -70,13 +70,7 @@ static void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq) goto out_free_req; } - pdu->size = le32_to_cpu(out.size_le); - - pdu->id = out.id; - pdu->tag = le16_to_cpu(out.tag_le); - - qemu_co_queue_init(&pdu->complete); - pdu_submit(pdu); + pdu_submit(pdu, &out); } return; diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index 5df97c90fa..922cc967be 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -243,14 +243,10 @@ static int xen_9pfs_receive(Xen9pfsRing *ring) /* cannot fail, because we only handle one request per ring at a time */ pdu = pdu_alloc(&ring->priv->state); - pdu->size = le32_to_cpu(h.size_le); - pdu->id = h.id; - pdu->tag = le32_to_cpu(h.tag_le); ring->out_size = le32_to_cpu(h.size_le); ring->out_cons = cons + le32_to_cpu(h.size_le); - qemu_co_queue_init(&pdu->complete); - pdu_submit(pdu); + pdu_submit(pdu, &h); return 0; } diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h index 900bdcb45a..629c8c648b 100644 --- a/include/sysemu/os-posix.h +++ b/include/sysemu/os-posix.h @@ -51,17 +51,6 @@ int os_mlock(void); typedef struct timeval qemu_timeval; #define qemu_gettimeofday(tp) gettimeofday(tp, NULL) -#ifndef CONFIG_UTIMENSAT -#ifndef UTIME_NOW -# define UTIME_NOW ((1l << 30) - 1l) -#endif -#ifndef UTIME_OMIT -# define UTIME_OMIT ((1l << 30) - 2l) -#endif -#endif -typedef struct timespec qemu_timespec; -int qemu_utimens(const char *path, const qemu_timespec *times); - bool is_daemonized(void); /** diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 69046ebf1b..723c8dcb1a 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -97,53 +97,6 @@ int load_vmstate(const char *name, Error **errp); void qemu_announce_self(void); -/* Subcommands for QEMU_VM_COMMAND */ -enum qemu_vm_cmd { - MIG_CMD_INVALID = 0, /* Must be 0 */ - MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */ - MIG_CMD_PING, /* Request a PONG on the RP */ - - MIG_CMD_POSTCOPY_ADVISE, /* Prior to any page transfers, just - warn we might want to do PC */ - MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming - pages as it's running. */ - MIG_CMD_POSTCOPY_RUN, /* Start execution */ - - MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that - were previously sent during - precopy but are dirty. */ - MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */ - MIG_CMD_MAX -}; - -#define MAX_VM_CMD_PACKAGED_SIZE (1ul << 24) - -bool qemu_savevm_state_blocked(Error **errp); -void qemu_savevm_state_begin(QEMUFile *f); -void qemu_savevm_state_header(QEMUFile *f); -int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy); -void qemu_savevm_state_cleanup(void); -void qemu_savevm_state_complete_postcopy(QEMUFile *f); -void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only); -void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size, - uint64_t *res_non_postcopiable, - uint64_t *res_postcopiable); -void qemu_savevm_command_send(QEMUFile *f, enum qemu_vm_cmd command, - uint16_t len, uint8_t *data); -void qemu_savevm_send_ping(QEMUFile *f, uint32_t value); -void qemu_savevm_send_open_return_path(QEMUFile *f); -int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len); -void qemu_savevm_send_postcopy_advise(QEMUFile *f); -void qemu_savevm_send_postcopy_listen(QEMUFile *f); -void qemu_savevm_send_postcopy_run(QEMUFile *f); - -void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name, - uint16_t len, - uint64_t *start_list, - uint64_t *length_list); - -int qemu_loadvm_state(QEMUFile *f); - extern int autostart; typedef enum { diff --git a/linux-user/strace.c b/linux-user/strace.c index 8fb1b6e252..d821d165ff 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -188,6 +188,93 @@ static void print_si_code(int arg) gemu_log("%s", codename); } +static void get_target_siginfo(target_siginfo_t *tinfo, + const target_siginfo_t *info) +{ + abi_ulong sival_ptr; + + int sig; + int si_errno; + int si_code; + int si_type; + + __get_user(sig, &info->si_signo); + __get_user(si_errno, &tinfo->si_errno); + __get_user(si_code, &info->si_code); + + tinfo->si_signo = sig; + tinfo->si_errno = si_errno; + tinfo->si_code = si_code; + + /* Ensure we don't leak random junk to the guest later */ + memset(tinfo->_sifields._pad, 0, sizeof(tinfo->_sifields._pad)); + + /* This is awkward, because we have to use a combination of + * the si_code and si_signo to figure out which of the union's + * members are valid. (Within the host kernel it is always possible + * to tell, but the kernel carefully avoids giving userspace the + * high 16 bits of si_code, so we don't have the information to + * do this the easy way...) We therefore make our best guess, + * bearing in mind that a guest can spoof most of the si_codes + * via rt_sigqueueinfo() if it likes. + * + * Once we have made our guess, we record it in the top 16 bits of + * the si_code, so that print_siginfo() later can use it. + * print_siginfo() will strip these top bits out before printing + * the si_code. + */ + + switch (si_code) { + case SI_USER: + case SI_TKILL: + case SI_KERNEL: + /* Sent via kill(), tkill() or tgkill(), or direct from the kernel. + * These are the only unspoofable si_code values. + */ + __get_user(tinfo->_sifields._kill._pid, &info->_sifields._kill._pid); + __get_user(tinfo->_sifields._kill._uid, &info->_sifields._kill._uid); + si_type = QEMU_SI_KILL; + break; + default: + /* Everything else is spoofable. Make best guess based on signal */ + switch (sig) { + case TARGET_SIGCHLD: + __get_user(tinfo->_sifields._sigchld._pid, + &info->_sifields._sigchld._pid); + __get_user(tinfo->_sifields._sigchld._uid, + &info->_sifields._sigchld._uid); + __get_user(tinfo->_sifields._sigchld._status, + &info->_sifields._sigchld._status); + __get_user(tinfo->_sifields._sigchld._utime, + &info->_sifields._sigchld._utime); + __get_user(tinfo->_sifields._sigchld._stime, + &info->_sifields._sigchld._stime); + si_type = QEMU_SI_CHLD; + break; + case TARGET_SIGIO: + __get_user(tinfo->_sifields._sigpoll._band, + &info->_sifields._sigpoll._band); + __get_user(tinfo->_sifields._sigpoll._fd, + &info->_sifields._sigpoll._fd); + si_type = QEMU_SI_POLL; + break; + default: + /* Assume a sigqueue()/mq_notify()/rt_sigqueueinfo() source. */ + __get_user(tinfo->_sifields._rt._pid, &info->_sifields._rt._pid); + __get_user(tinfo->_sifields._rt._uid, &info->_sifields._rt._uid); + /* XXX: potential problem if 64 bit */ + __get_user(sival_ptr, &info->_sifields._rt._sigval.sival_ptr); + tinfo->_sifields._rt._sigval.sival_ptr = sival_ptr; + + si_type = QEMU_SI_RT; + break; + } + break; + } + + tinfo->si_code = deposit32(si_code, 16, 16, si_type); +} + static void print_siginfo(const target_siginfo_t *tinfo) { /* Print a target_siginfo_t in the format desired for printing @@ -206,26 +293,26 @@ static void print_siginfo(const target_siginfo_t *tinfo) switch (si_type) { case QEMU_SI_KILL: - gemu_log(", si_pid = %u, si_uid = %u", + gemu_log(", si_pid=%u, si_uid=%u", (unsigned int)tinfo->_sifields._kill._pid, (unsigned int)tinfo->_sifields._kill._uid); break; case QEMU_SI_TIMER: - gemu_log(", si_timer1 = %u, si_timer2 = %u", + gemu_log(", si_timer1=%u, si_timer2=%u", tinfo->_sifields._timer._timer1, tinfo->_sifields._timer._timer2); break; case QEMU_SI_POLL: - gemu_log(", si_band = %d, si_fd = %d", + gemu_log(", si_band=%d, si_fd=%d", tinfo->_sifields._sigpoll._band, tinfo->_sifields._sigpoll._fd); break; case QEMU_SI_FAULT: - gemu_log(", si_addr = "); + gemu_log(", si_addr="); print_pointer(tinfo->_sifields._sigfault._addr, 1); break; case QEMU_SI_CHLD: - gemu_log(", si_pid = %u, si_uid = %u, si_status = %d" + gemu_log(", si_pid=%u, si_uid=%u, si_status=%d" ", si_utime=" TARGET_ABI_FMT_ld ", si_stime=" TARGET_ABI_FMT_ld, (unsigned int)(tinfo->_sifields._sigchld._pid), @@ -235,7 +322,7 @@ static void print_siginfo(const target_siginfo_t *tinfo) tinfo->_sifields._sigchld._stime); break; case QEMU_SI_RT: - gemu_log(", si_pid = %u, si_uid = %u, si_sigval = " TARGET_ABI_FMT_ld, + gemu_log(", si_pid=%u, si_uid=%u, si_sigval=" TARGET_ABI_FMT_ld, (unsigned int)tinfo->_sifields._rt._pid, (unsigned int)tinfo->_sifields._rt._uid, tinfo->_sifields._rt._sigval.sival_ptr); @@ -1901,6 +1988,57 @@ print_rt_sigprocmask(const struct syscallname *name, } #endif +#ifdef TARGET_NR_rt_sigqueueinfo +static void +print_rt_sigqueueinfo(const struct syscallname *name, + abi_long arg0, abi_long arg1, abi_long arg2, + abi_long arg3, abi_long arg4, abi_long arg5) +{ + void *p; + target_siginfo_t uinfo; + + print_syscall_prologue(name); + print_raw_param("%d", arg0, 0); + print_signal(arg1, 0); + p = lock_user(VERIFY_READ, arg2, sizeof(target_siginfo_t), 1); + if (p) { + get_target_siginfo(&uinfo, p); + print_siginfo(&uinfo); + + unlock_user(p, arg2, 0); + } else { + print_pointer(arg2, 1); + } + print_syscall_epilogue(name); +} +#endif + +#ifdef TARGET_NR_rt_tgsigqueueinfo +static void +print_rt_tgsigqueueinfo(const struct syscallname *name, + abi_long arg0, abi_long arg1, abi_long arg2, + abi_long arg3, abi_long arg4, abi_long arg5) +{ + void *p; + target_siginfo_t uinfo; + + print_syscall_prologue(name); + print_raw_param("%d", arg0, 0); + print_raw_param("%d", arg1, 0); + print_signal(arg2, 0); + p = lock_user(VERIFY_READ, arg3, sizeof(target_siginfo_t), 1); + if (p) { + get_target_siginfo(&uinfo, p); + print_siginfo(&uinfo); + + unlock_user(p, arg3, 0); + } else { + print_pointer(arg3, 1); + } + print_syscall_epilogue(name); +} +#endif + #ifdef TARGET_NR_syslog static void print_syslog_action(abi_ulong arg, int last) @@ -2415,6 +2553,33 @@ print_kill(const struct syscallname *name, } #endif +#ifdef TARGET_NR_tkill +static void +print_tkill(const struct syscallname *name, + abi_long arg0, abi_long arg1, abi_long arg2, + abi_long arg3, abi_long arg4, abi_long arg5) +{ + print_syscall_prologue(name); + print_raw_param("%d", arg0, 0); + print_signal(arg1, 1); + print_syscall_epilogue(name); +} +#endif + +#ifdef TARGET_NR_tgkill +static void +print_tgkill(const struct syscallname *name, + abi_long arg0, abi_long arg1, abi_long arg2, + abi_long arg3, abi_long arg4, abi_long arg5) +{ + print_syscall_prologue(name); + print_raw_param("%d", arg0, 0); + print_raw_param("%d", arg1, 0); + print_signal(arg2, 1); + print_syscall_epilogue(name); +} +#endif + /* * An array of all of the syscalls we know about */ diff --git a/linux-user/strace.list b/linux-user/strace.list index 3b1282ec1a..a91e33f7e5 100644 --- a/linux-user/strace.list +++ b/linux-user/strace.list @@ -290,7 +290,7 @@ { TARGET_NR_getegid32, "getegid32" , NULL, NULL, NULL }, #endif #ifdef TARGET_NR_geteuid -{ TARGET_NR_geteuid, "geteuid" , NULL, NULL, NULL }, +{ TARGET_NR_geteuid, "geteuid" , "%s()", NULL, NULL }, #endif #ifdef TARGET_NR_geteuid32 { TARGET_NR_geteuid32, "geteuid32" , NULL, NULL, NULL }, @@ -338,7 +338,7 @@ { TARGET_NR_getpmsg, "getpmsg" , NULL, NULL, NULL }, #endif #ifdef TARGET_NR_getppid -{ TARGET_NR_getppid, "getppid" , NULL, NULL, NULL }, +{ TARGET_NR_getppid, "getppid" , "%s()", NULL, NULL }, #endif #ifdef TARGET_NR_getpriority { TARGET_NR_getpriority, "getpriority", "%s(%#x,%#x)", NULL, NULL }, @@ -381,13 +381,13 @@ NULL, NULL }, #endif #ifdef TARGET_NR_gettid -{ TARGET_NR_gettid, "gettid" , NULL, NULL, NULL }, +{ TARGET_NR_gettid, "gettid" , "%s()", NULL, NULL }, #endif #ifdef TARGET_NR_gettimeofday { TARGET_NR_gettimeofday, "gettimeofday" , NULL, NULL, NULL }, #endif #ifdef TARGET_NR_getuid -{ TARGET_NR_getuid, "getuid" , NULL, NULL, NULL }, +{ TARGET_NR_getuid, "getuid" , "%s()", NULL, NULL }, #endif #ifdef TARGET_NR_getuid32 { TARGET_NR_getuid32, "getuid32" , NULL, NULL, NULL }, @@ -1155,7 +1155,7 @@ { TARGET_NR_rt_sigprocmask, "rt_sigprocmask" , NULL, print_rt_sigprocmask, NULL }, #endif #ifdef TARGET_NR_rt_sigqueueinfo -{ TARGET_NR_rt_sigqueueinfo, "rt_sigqueueinfo" , NULL, NULL, NULL }, +{ TARGET_NR_rt_sigqueueinfo, "rt_sigqueueinfo" , NULL, print_rt_sigqueueinfo, NULL }, #endif #ifdef TARGET_NR_rt_sigreturn { TARGET_NR_rt_sigreturn, "rt_sigreturn" , NULL, NULL, NULL }, @@ -1167,7 +1167,7 @@ { TARGET_NR_rt_sigtimedwait, "rt_sigtimedwait" , NULL, NULL, NULL }, #endif #ifdef TARGET_NR_rt_tgsigqueueinfo -{ TARGET_NR_rt_tgsigqueueinfo, "rt_tgsigqueueinfo" , NULL, NULL, NULL }, +{ TARGET_NR_rt_tgsigqueueinfo, "rt_tgsigqueueinfo" , NULL, print_rt_tgsigqueueinfo, NULL }, #endif #ifdef TARGET_NR_sched_getaffinity { TARGET_NR_sched_getaffinity, "sched_getaffinity" , NULL, NULL, NULL }, @@ -1498,7 +1498,7 @@ { TARGET_NR_tee, "tee" , NULL, NULL, NULL }, #endif #ifdef TARGET_NR_tgkill -{ TARGET_NR_tgkill, "tgkill" , NULL, NULL, NULL }, +{ TARGET_NR_tgkill, "tgkill" , NULL, print_tgkill, NULL }, #endif #ifdef TARGET_NR_time { TARGET_NR_time, "time" , NULL, NULL, NULL }, @@ -1534,7 +1534,7 @@ { TARGET_NR_times, "times" , NULL, NULL, NULL }, #endif #ifdef TARGET_NR_tkill -{ TARGET_NR_tkill, "tkill" , NULL, NULL, NULL }, +{ TARGET_NR_tkill, "tkill" , NULL, print_tkill, NULL }, #endif #ifdef TARGET_NR_truncate { TARGET_NR_truncate, "truncate" , NULL, NULL, NULL }, diff --git a/linux-user/syscall.c b/linux-user/syscall.c index cec8428589..925ae11ea6 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -238,6 +238,7 @@ static type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5, \ #define __NR_sys_getdents64 __NR_getdents64 #define __NR_sys_getpriority __NR_getpriority #define __NR_sys_rt_sigqueueinfo __NR_rt_sigqueueinfo +#define __NR_sys_rt_tgsigqueueinfo __NR_rt_tgsigqueueinfo #define __NR_sys_syslog __NR_syslog #define __NR_sys_futex __NR_futex #define __NR_sys_inotify_init __NR_inotify_init @@ -274,7 +275,9 @@ _syscall3(int, sys_getdents64, uint, fd, struct linux_dirent64 *, dirp, uint, co _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo, loff_t *, res, uint, wh); #endif -_syscall3(int,sys_rt_sigqueueinfo,int,pid,int,sig,siginfo_t *,uinfo) +_syscall3(int, sys_rt_sigqueueinfo, pid_t, pid, int, sig, siginfo_t *, uinfo) +_syscall4(int, sys_rt_tgsigqueueinfo, pid_t, pid, pid_t, tid, int, sig, + siginfo_t *, uinfo) _syscall3(int,sys_syslog,int,type,char*,bufp,int,len) #ifdef __NR_exit_group _syscall1(int,exit_group,int,error_code) @@ -7358,52 +7361,19 @@ int host_to_target_waitstatus(int status) static int open_self_cmdline(void *cpu_env, int fd) { - int fd_orig = -1; - bool word_skipped = false; - - fd_orig = open("/proc/self/cmdline", O_RDONLY); - if (fd_orig < 0) { - return fd_orig; - } + CPUState *cpu = ENV_GET_CPU((CPUArchState *)cpu_env); + struct linux_binprm *bprm = ((TaskState *)cpu->opaque)->bprm; + int i; - while (true) { - ssize_t nb_read; - char buf[128]; - char *cp_buf = buf; + for (i = 0; i < bprm->argc; i++) { + size_t len = strlen(bprm->argv[i]) + 1; - nb_read = read(fd_orig, buf, sizeof(buf)); - if (nb_read < 0) { - int e = errno; - fd_orig = close(fd_orig); - errno = e; + if (write(fd, bprm->argv[i], len) != len) { return -1; - } else if (nb_read == 0) { - break; - } - - if (!word_skipped) { - /* Skip the first string, which is the path to qemu-*-static - instead of the actual command. */ - cp_buf = memchr(buf, 0, nb_read); - if (cp_buf) { - /* Null byte found, skip one string */ - cp_buf++; - nb_read -= cp_buf - buf; - word_skipped = true; - } - } - - if (word_skipped) { - if (write(fd, cp_buf, nb_read) != nb_read) { - int e = errno; - close(fd_orig); - errno = e; - return -1; - } } } - return close(fd_orig); + return 0; } static int open_self_maps(void *cpu_env, int fd) @@ -7671,6 +7641,55 @@ static target_timer_t get_timer_id(abi_long arg) return timerid; } +static abi_long swap_data_eventfd(void *buf, size_t len) +{ + uint64_t *counter = buf; + int i; + + if (len < sizeof(uint64_t)) { + return -EINVAL; + } + + for (i = 0; i < len; i += sizeof(uint64_t)) { + *counter = tswap64(*counter); + counter++; + } + + return len; +} + +static TargetFdTrans target_eventfd_trans = { + .host_to_target_data = swap_data_eventfd, + .target_to_host_data = swap_data_eventfd, +}; + +#if (defined(TARGET_NR_inotify_init) && defined(__NR_inotify_init)) || \ + (defined(CONFIG_INOTIFY1) && defined(TARGET_NR_inotify_init1) && \ + defined(__NR_inotify_init1)) +static abi_long host_to_target_data_inotify(void *buf, size_t len) +{ + struct inotify_event *ev; + int i; + uint32_t name_len; + + for (i = 0; i < len; i += sizeof(struct inotify_event) + name_len) { + ev = (struct inotify_event *)((char *)buf + i); + name_len = ev->len; + + ev->wd = tswap32(ev->wd); + ev->mask = tswap32(ev->mask); + ev->cookie = tswap32(ev->cookie); + ev->len = tswap32(name_len); + } + + return len; +} + +static TargetFdTrans target_inotify_trans = { + .host_to_target_data = host_to_target_data_inotify, +}; +#endif + /* do_syscall() should always have a single exit point at the end so that actions, such as logging of syscall results, can be performed. All errnos that do_syscall() returns must be -TARGET_<errcode>. */ @@ -7767,7 +7786,17 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, case TARGET_NR_write: if (!(p = lock_user(VERIFY_READ, arg2, arg3, 1))) goto efault; - ret = get_errno(safe_write(arg1, p, arg3)); + if (fd_trans_target_to_host_data(arg1)) { + void *copy = g_malloc(arg3); + memcpy(copy, p, arg3); + ret = fd_trans_target_to_host_data(arg1)(copy, arg3); + if (ret >= 0) { + ret = get_errno(safe_write(arg1, copy, ret)); + } + g_free(copy); + } else { + ret = get_errno(safe_write(arg1, p, arg3)); + } unlock_user(p, arg2, 0); break; #ifdef TARGET_NR_open @@ -7926,8 +7955,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, envc++; } - argp = alloca((argc + 1) * sizeof(void *)); - envp = alloca((envc + 1) * sizeof(void *)); + argp = g_new0(char *, argc + 1); + envp = g_new0(char *, envc + 1); for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) { @@ -7988,6 +8017,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; unlock_user(*q, addr, 0); } + + g_free(argp); + g_free(envp); } break; case TARGET_NR_chdir: @@ -8592,17 +8624,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #ifdef TARGET_NR_ssetmask /* not on alpha */ case TARGET_NR_ssetmask: { - sigset_t set, oset, cur_set; + sigset_t set, oset; abi_ulong target_set = arg1; - /* We only have one word of the new mask so we must read - * the rest of it with do_sigprocmask() and OR in this word. - * We are guaranteed that a do_sigprocmask() that only queries - * the signal mask will not fail. - */ - ret = do_sigprocmask(0, NULL, &cur_set); - assert(!ret); target_to_host_old_sigset(&set, &target_set); - sigorset(&set, &set, &cur_set); ret = do_sigprocmask(SIG_SETMASK, &set, &oset); if (!ret) { host_to_target_old_sigset(&target_set, &oset); @@ -8847,10 +8871,23 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, goto efault; } target_to_host_siginfo(&uinfo, p); - unlock_user(p, arg1, 0); + unlock_user(p, arg3, 0); ret = get_errno(sys_rt_sigqueueinfo(arg1, arg2, &uinfo)); } break; + case TARGET_NR_rt_tgsigqueueinfo: + { + siginfo_t uinfo; + + p = lock_user(VERIFY_READ, arg4, sizeof(target_siginfo_t), 1); + if (!p) { + goto efault; + } + target_to_host_siginfo(&uinfo, p); + unlock_user(p, arg4, 0); + ret = get_errno(sys_rt_tgsigqueueinfo(arg1, arg2, arg3, &uinfo)); + } + break; #ifdef TARGET_NR_sigreturn case TARGET_NR_sigreturn: if (block_signals()) { @@ -11229,6 +11266,15 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #ifdef TARGET_NR_fadvise64_64 case TARGET_NR_fadvise64_64: +#if defined(TARGET_PPC) + /* 6 args: fd, advice, offset (high, low), len (high, low) */ + ret = arg2; + arg2 = arg3; + arg3 = arg4; + arg4 = arg5; + arg5 = arg6; + arg6 = ret; +#else /* 6 args: fd, offset (high, low), len (high, low), advice */ if (regpairs_aligned(cpu_env)) { /* offset is in (3,4), len in (5,6) and advice in 7 */ @@ -11238,6 +11284,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, arg5 = arg6; arg6 = arg7; } +#endif ret = -host_to_target_errno(posix_fadvise(arg1, target_offset64(arg2, arg3), target_offset64(arg4, arg5), @@ -11694,6 +11741,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #if defined(TARGET_NR_inotify_init) && defined(__NR_inotify_init) case TARGET_NR_inotify_init: ret = get_errno(sys_inotify_init()); + fd_trans_register(ret, &target_inotify_trans); break; #endif #ifdef CONFIG_INOTIFY1 @@ -11701,6 +11749,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, case TARGET_NR_inotify_init1: ret = get_errno(sys_inotify_init1(target_to_host_bitmask(arg1, fcntl_flags_tbl))); + fd_trans_register(ret, &target_inotify_trans); break; #endif #endif @@ -11866,7 +11915,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #if defined(TARGET_NR_eventfd) case TARGET_NR_eventfd: ret = get_errno(eventfd(arg1, 0)); - fd_trans_unregister(ret); + fd_trans_register(ret, &target_eventfd_trans); break; #endif #if defined(TARGET_NR_eventfd2) @@ -11880,7 +11929,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, host_flags |= O_CLOEXEC; } ret = get_errno(eventfd(arg1, host_flags)); - fd_trans_unregister(ret); + fd_trans_register(ret, &target_eventfd_trans); break; } #endif diff --git a/migration/colo.c b/migration/colo.c index 8c13a3c3f1..3dd1390573 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -16,6 +16,7 @@ #include "qemu-file-channel.h" #include "migration/migration.h" #include "migration/qemu-file.h" +#include "savevm.h" #include "migration/colo.h" #include "migration/block.h" #include "io/channel-buffer.h" diff --git a/migration/migration.c b/migration/migration.c index ad29e53400..7087d1abbb 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -19,6 +19,7 @@ #include "qemu/main-loop.h" #include "migration/blocker.h" #include "migration/migration.h" +#include "savevm.h" #include "qemu-file-channel.h" #include "migration/qemu-file.h" #include "migration/vmstate.h" diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index a37620dac6..3f9ae1bff2 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -22,6 +22,7 @@ #include "exec/target_page.h" #include "migration/migration.h" #include "migration/qemu-file.h" +#include "savevm.h" #include "postcopy-ram.h" #include "sysemu/sysemu.h" #include "sysemu/balloon.h" diff --git a/migration/ram.c b/migration/ram.c index c07a9c08d9..26e03a5dfa 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -673,10 +673,6 @@ static void migration_bitmap_sync(RAMState *rs) rs->bitmap_sync_count++; - if (!rs->bytes_xfer_prev) { - rs->bytes_xfer_prev = ram_bytes_transferred(); - } - if (!rs->time_last_bitmap_sync) { rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); } @@ -698,23 +694,25 @@ static void migration_bitmap_sync(RAMState *rs) /* more than 1 second = 1000 millisecons */ if (end_time > rs->time_last_bitmap_sync + 1000) { + /* calculate period counters */ + rs->dirty_pages_rate = rs->num_dirty_pages_period * 1000 + / (end_time - rs->time_last_bitmap_sync); + bytes_xfer_now = ram_bytes_transferred(); + if (migrate_auto_converge()) { /* The following detection logic can be refined later. For now: Check to see if the dirtied bytes is 50% more than the approx. amount of bytes that just got transferred since the last time we were in this routine. If that happens twice, start or increase throttling */ - bytes_xfer_now = ram_bytes_transferred(); - if (rs->dirty_pages_rate && - (rs->num_dirty_pages_period * TARGET_PAGE_SIZE > + if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE > (bytes_xfer_now - rs->bytes_xfer_prev) / 2) && - (rs->dirty_rate_high_cnt++ >= 2)) { + (++rs->dirty_rate_high_cnt >= 2)) { trace_migration_throttle(); rs->dirty_rate_high_cnt = 0; mig_throttle_guest_down(); - } - rs->bytes_xfer_prev = bytes_xfer_now; + } } if (migrate_use_xbzrle()) { @@ -727,10 +725,11 @@ static void migration_bitmap_sync(RAMState *rs) rs->iterations_prev = rs->iterations; rs->xbzrle_cache_miss_prev = rs->xbzrle_cache_miss; } - rs->dirty_pages_rate = rs->num_dirty_pages_period * 1000 - / (end_time - rs->time_last_bitmap_sync); + + /* reset period counters */ rs->time_last_bitmap_sync = end_time; rs->num_dirty_pages_period = 0; + rs->bytes_xfer_prev = bytes_xfer_now; } if (migrate_use_events()) { qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL); diff --git a/migration/savevm.c b/migration/savevm.c index a4532b6b58..a2d4f9c53c 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -36,6 +36,7 @@ #include "qemu/timer.h" #include "migration/migration.h" #include "qemu-file-channel.h" +#include "savevm.h" #include "postcopy-ram.h" #include "qapi/qmp/qerror.h" #include "qemu/error-report.h" @@ -63,6 +64,26 @@ const unsigned int postcopy_ram_discard_version = 0; static bool skip_section_footers; +/* Subcommands for QEMU_VM_COMMAND */ +enum qemu_vm_cmd { + MIG_CMD_INVALID = 0, /* Must be 0 */ + MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */ + MIG_CMD_PING, /* Request a PONG on the RP */ + + MIG_CMD_POSTCOPY_ADVISE, /* Prior to any page transfers, just + warn we might want to do PC */ + MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming + pages as it's running. */ + MIG_CMD_POSTCOPY_RUN, /* Start execution */ + + MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that + were previously sent during + precopy but are dirty. */ + MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */ + MIG_CMD_MAX +}; + +#define MAX_VM_CMD_PACKAGED_SIZE (1ul << 24) static struct mig_cmd_args { ssize_t len; /* -1 = variable */ const char *name; @@ -807,10 +828,10 @@ static void save_section_footer(QEMUFile *f, SaveStateEntry *se) * @len: Length of associated data * @data: Data associated with command. */ -void qemu_savevm_command_send(QEMUFile *f, - enum qemu_vm_cmd command, - uint16_t len, - uint8_t *data) +static void qemu_savevm_command_send(QEMUFile *f, + enum qemu_vm_cmd command, + uint16_t len, + uint8_t *data) { trace_savevm_command_send(command, len); qemu_put_byte(f, QEMU_VM_COMMAND); diff --git a/migration/savevm.h b/migration/savevm.h new file mode 100644 index 0000000000..eb4487771a --- /dev/null +++ b/migration/savevm.h @@ -0,0 +1,41 @@ +/* + * QEMU save vm functions + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2009-2017 Red Hat Inc + * + * Authors: + * Juan Quintela <quintela@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef MIGRATION_SAVEVM_H +#define MIGRATION_SAVEVM_H + +bool qemu_savevm_state_blocked(Error **errp); +void qemu_savevm_state_begin(QEMUFile *f); +void qemu_savevm_state_header(QEMUFile *f); +int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy); +void qemu_savevm_state_cleanup(void); +void qemu_savevm_state_complete_postcopy(QEMUFile *f); +void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only); +void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size, + uint64_t *res_non_postcopiable, + uint64_t *res_postcopiable); +void qemu_savevm_send_ping(QEMUFile *f, uint32_t value); +void qemu_savevm_send_open_return_path(QEMUFile *f); +int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len); +void qemu_savevm_send_postcopy_advise(QEMUFile *f); +void qemu_savevm_send_postcopy_listen(QEMUFile *f); +void qemu_savevm_send_postcopy_run(QEMUFile *f); + +void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name, + uint16_t len, + uint64_t *start_list, + uint64_t *length_list); + +int qemu_loadvm_state(QEMUFile *f); + +#endif @@ -231,8 +231,7 @@ static void parse_numa_distance(NumaDistOptions *dist, Error **errp) if (src >= MAX_NODES || dst >= MAX_NODES) { error_setg(errp, - "Invalid node %" PRIu16 - ", max possible could be %" PRIu16, + "Invalid node %d, max possible could be %d", MAX(src, dst), MAX_NODES); return; } diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h index 6c07c6b24b..ffb91687b8 100644 --- a/target/sh4/cpu.h +++ b/target/sh4/cpu.h @@ -91,8 +91,10 @@ #define FPSCR_RM_NEAREST (0 << 0) #define FPSCR_RM_ZERO (1 << 0) +#define DELAY_SLOT_MASK 0x7 #define DELAY_SLOT (1 << 0) #define DELAY_SLOT_CONDITIONAL (1 << 1) +#define DELAY_SLOT_RTE (1 << 2) typedef struct tlb_t { uint32_t vpn; /* virtual page number */ @@ -263,7 +265,13 @@ void cpu_load_tlb(CPUSH4State * env); #define MMU_USER_IDX 1 static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch) { - return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0; + /* The instruction in a RTE delay slot is fetched in privileged + mode, but executed in user mode. */ + if (ifetch && (env->flags & DELAY_SLOT_RTE)) { + return 0; + } else { + return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0; + } } #include "exec/cpu-all.h" @@ -380,7 +388,7 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc, { *pc = env->pc; *cs_base = 0; - *flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) /* Bits 0-1 */ + *flags = (env->flags & DELAY_SLOT_MASK) /* Bits 0- 2 */ | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */ | (env->sr & ((1u << SR_MD) | (1u << SR_RB))) /* Bits 29-30 */ | (env->sr & (1u << SR_FD)) /* Bit 15 */ diff --git a/target/sh4/helper.c b/target/sh4/helper.c index 8f8ce81401..28d93c2543 100644 --- a/target/sh4/helper.c +++ b/target/sh4/helper.c @@ -21,6 +21,7 @@ #include "cpu.h" #include "exec/exec-all.h" #include "exec/log.h" +#include "sysemu/sysemu.h" #if !defined(CONFIG_USER_ONLY) #include "hw/sh4/sh_intc.h" @@ -92,7 +93,14 @@ void superh_cpu_do_interrupt(CPUState *cs) if (env->sr & (1u << SR_BL)) { if (do_exp && cs->exception_index != 0x1e0) { - cs->exception_index = 0x000; /* masked exception -> reset */ + /* In theory a masked exception generates a reset exception, + which in turn jumps to the reset vector. However this only + works when using a bootloader. When using a kernel and an + initrd, they need to be reloaded and the program counter + should be loaded with the kernel entry point. + qemu_system_reset_request takes care of that. */ + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + return; } if (do_irq && !env->in_sleep) { return; /* masked */ @@ -164,11 +172,11 @@ void superh_cpu_do_interrupt(CPUState *cs) env->sgr = env->gregs[15]; env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB); - if (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { + if (env->flags & DELAY_SLOT_MASK) { /* Branch instruction should be executed again before delay slot. */ env->spc -= 2; /* Clear flags for exception/interrupt routine. */ - env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL); + env->flags &= ~DELAY_SLOT_MASK; } if (do_exp) { @@ -420,7 +428,7 @@ static int get_physical_address(CPUSH4State * env, target_ulong * physical, if (!(env->sr & (1u << SR_MD)) && (address < 0xe0000000 || address >= 0xe4000000)) { /* Unauthorized access in user mode (only store queues are available) */ - fprintf(stderr, "Unauthorized access\n"); + qemu_log_mask(LOG_GUEST_ERROR, "Unauthorized access\n"); if (rw == 0) return MMU_DADDR_ERROR_READ; else if (rw == 1) @@ -863,8 +871,16 @@ int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr) bool superh_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { if (interrupt_request & CPU_INTERRUPT_HARD) { - superh_cpu_do_interrupt(cs); - return true; + SuperHCPU *cpu = SUPERH_CPU(cs); + CPUSH4State *env = &cpu->env; + + /* Delay slots are indivisible, ignore interrupts */ + if (env->flags & DELAY_SLOT_MASK) { + return false; + } else { + superh_cpu_do_interrupt(cs); + return true; + } } return false; } diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 0bc2f9ff19..8bc132b27b 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -185,6 +185,9 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, } else if (env->flags & DELAY_SLOT_CONDITIONAL) { cpu_fprintf(f, "in conditional delay slot (delayed_pc=0x%08x)\n", env->delayed_pc); + } else if (env->flags & DELAY_SLOT_RTE) { + cpu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n", + env->delayed_pc); } } @@ -217,8 +220,7 @@ static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc) if (ctx->delayed_pc != (uint32_t) -1) { tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc); } - if ((ctx->tbflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) - != ctx->envflags) { + if ((ctx->tbflags & DELAY_SLOT_MASK) != ctx->envflags) { tcg_gen_movi_i32(cpu_flags, ctx->envflags); } } @@ -329,7 +331,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) #define DREG(x) FREG(x) /* Assumes lsb of (x) is always 0 */ #define CHECK_NOT_DELAY_SLOT \ - if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \ + if (ctx->envflags & DELAY_SLOT_MASK) { \ gen_save_cpu_state(ctx, true); \ gen_helper_raise_slot_illegal_instruction(cpu_env); \ ctx->bstate = BS_EXCP; \ @@ -339,7 +341,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) #define CHECK_PRIVILEGED \ if (IS_USER(ctx)) { \ gen_save_cpu_state(ctx, true); \ - if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \ + if (ctx->envflags & DELAY_SLOT_MASK) { \ gen_helper_raise_slot_illegal_instruction(cpu_env); \ } else { \ gen_helper_raise_illegal_instruction(cpu_env); \ @@ -351,7 +353,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) #define CHECK_FPU_ENABLED \ if (ctx->tbflags & (1u << SR_FD)) { \ gen_save_cpu_state(ctx, true); \ - if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \ + if (ctx->envflags & DELAY_SLOT_MASK) { \ gen_helper_raise_slot_fpu_disable(cpu_env); \ } else { \ gen_helper_raise_fpu_disable(cpu_env); \ @@ -428,8 +430,9 @@ static void _decode_opc(DisasContext * ctx) CHECK_NOT_DELAY_SLOT gen_write_sr(cpu_ssr); tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc); - ctx->envflags |= DELAY_SLOT; + ctx->envflags |= DELAY_SLOT_RTE; ctx->delayed_pc = (uint32_t) - 1; + ctx->bstate = BS_STOP; return; case 0x0058: /* sets */ tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_S)); @@ -1784,7 +1787,7 @@ static void _decode_opc(DisasContext * ctx) fflush(stderr); #endif gen_save_cpu_state(ctx, true); - if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { + if (ctx->envflags & DELAY_SLOT_MASK) { gen_helper_raise_slot_illegal_instruction(cpu_env); } else { gen_helper_raise_illegal_instruction(cpu_env); @@ -1798,14 +1801,14 @@ static void decode_opc(DisasContext * ctx) _decode_opc(ctx); - if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { + if (old_flags & DELAY_SLOT_MASK) { /* go out of the delay slot */ - ctx->envflags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL); + ctx->envflags &= ~DELAY_SLOT_MASK; tcg_gen_movi_i32(cpu_flags, ctx->envflags); ctx->bstate = BS_BRANCH; if (old_flags & DELAY_SLOT_CONDITIONAL) { gen_delayed_conditional_jump(ctx); - } else if (old_flags & DELAY_SLOT) { + } else { gen_jump(ctx); } @@ -1824,7 +1827,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb) pc_start = tb->pc; ctx.pc = pc_start; ctx.tbflags = (uint32_t)tb->flags; - ctx.envflags = tb->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL); + ctx.envflags = tb->flags & DELAY_SLOT_MASK; ctx.bstate = BS_NONE; ctx.memidx = (ctx.tbflags & (1u << SR_MD)) == 0 ? 1 : 0; /* We don't know if the delayed pc came from a dynamic or static branch, diff --git a/tests/numa-test.c b/tests/numa-test.c index c3475d6d5e..3f636840b1 100644 --- a/tests/numa-test.c +++ b/tests/numa-test.c @@ -92,7 +92,7 @@ static QList *get_cpus(QDict **resp) *resp = qmp("{ 'execute': 'query-cpus' }"); g_assert(*resp); g_assert(qdict_haskey(*resp, "return")); - return qdict_get_qlist(*resp, "return"); + return qdict_get_qlist(*resp, "return"); } static void test_query_cpus(const void *data) @@ -100,7 +100,7 @@ static void test_query_cpus(const void *data) char *cli; QDict *resp; QList *cpus; - const QObject *e; + QObject *e; cli = make_cli(data, "-smp 8 -numa node,cpus=0-3 -numa node,cpus=4-7"); qtest_start(cli); @@ -124,6 +124,7 @@ static void test_query_cpus(const void *data) } else { g_assert_cmpint(node, ==, 1); } + qobject_decref(e); } QDECREF(resp); @@ -136,7 +137,7 @@ static void pc_numa_cpu(const void *data) char *cli; QDict *resp; QList *cpus; - const QObject *e; + QObject *e; cli = make_cli(data, "-cpu pentium -smp 8,sockets=2,cores=2,threads=2 " "-numa node,nodeid=0 -numa node,nodeid=1 " @@ -176,6 +177,7 @@ static void pc_numa_cpu(const void *data) } else { g_assert(false); } + qobject_decref(e); } QDECREF(resp); @@ -188,7 +190,7 @@ static void spapr_numa_cpu(const void *data) char *cli; QDict *resp; QList *cpus; - const QObject *e; + QObject *e; cli = make_cli(data, "-smp 4,cores=4 " "-numa node,nodeid=0 -numa node,nodeid=1 " @@ -220,6 +222,7 @@ static void spapr_numa_cpu(const void *data) } else { g_assert(false); } + qobject_decref(e); } QDECREF(resp); @@ -232,7 +235,7 @@ static void aarch64_numa_cpu(const void *data) char *cli; QDict *resp; QList *cpus; - const QObject *e; + QObject *e; cli = make_cli(data, "-smp 2 " "-numa node,nodeid=0 -numa node,nodeid=1 " @@ -262,6 +265,7 @@ static void aarch64_numa_cpu(const void *data) } else { g_assert(false); } + qobject_decref(e); } QDECREF(resp); diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 4d9189e9ef..7e28c161b2 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -207,53 +207,6 @@ int qemu_pipe(int pipefd[2]) return ret; } -int qemu_utimens(const char *path, const struct timespec *times) -{ - struct timeval tv[2], tv_now; - struct stat st; - int i; -#ifdef CONFIG_UTIMENSAT - int ret; - - ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW); - if (ret != -1 || errno != ENOSYS) { - return ret; - } -#endif - /* Fallback: use utimes() instead of utimensat() */ - - /* happy if special cases */ - if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) { - return 0; - } - if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) { - return utimes(path, NULL); - } - - /* prepare for hard cases */ - if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) { - gettimeofday(&tv_now, NULL); - } - if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) { - stat(path, &st); - } - - for (i = 0; i < 2; i++) { - if (times[i].tv_nsec == UTIME_NOW) { - tv[i].tv_sec = tv_now.tv_sec; - tv[i].tv_usec = tv_now.tv_usec; - } else if (times[i].tv_nsec == UTIME_OMIT) { - tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime; - tv[i].tv_usec = 0; - } else { - tv[i].tv_sec = times[i].tv_sec; - tv[i].tv_usec = times[i].tv_nsec / 1000; - } - } - - return utimes(path, &tv[0]); -} - char * qemu_get_local_state_pathname(const char *relative_pathname) { |