diff options
48 files changed, 1456 insertions, 722 deletions
diff --git a/.shippable.yml b/.shippable.yml index 1a1fd7a91d..653bd750fe 100644 --- a/.shippable.yml +++ b/.shippable.yml @@ -5,6 +5,8 @@ env: TARGET_LIST=arm-softmmu,arm-linux-user - IMAGE=debian-arm64-cross TARGET_LIST=aarch64-softmmu,aarch64-linux-user + - IMAGE=debian-s390x-cross + TARGET_LIST=s390x-softmmu,s390x-linux-user build: pre_ci: - make docker-image-${IMAGE} diff --git a/CODING_STYLE b/CODING_STYLE index f53180bf3f..2fa0c0b65b 100644 --- a/CODING_STYLE +++ b/CODING_STYLE @@ -116,3 +116,10 @@ if (a == 1) { Rationale: Yoda conditions (as in 'if (1 == a)') are awkward to read. Besides, good compilers already warn users when '==' is mis-typed as '=', even when the constant is on the right. + +7. Comment style + +We use traditional C-style /* */ comments and avoid // comments. + +Rationale: The // form is valid in C99, so this is purely a matter of +consistency of style. The checkpatch script will warn you about this. diff --git a/blockdev.c b/blockdev.c index 2b2f6ceef0..8682bd81d8 100644 --- a/blockdev.c +++ b/blockdev.c @@ -52,6 +52,7 @@ #include "sysemu/arch_init.h" #include "qemu/cutils.h" #include "qemu/help_option.h" +#include "qemu/throttle-options.h" static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states = QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states); @@ -4007,83 +4008,11 @@ QemuOptsList qemu_common_drive_opts = { .name = BDRV_OPT_READ_ONLY, .type = QEMU_OPT_BOOL, .help = "open drive file as read-only", - },{ - .name = "throttling.iops-total", - .type = QEMU_OPT_NUMBER, - .help = "limit total I/O operations per second", - },{ - .name = "throttling.iops-read", - .type = QEMU_OPT_NUMBER, - .help = "limit read operations per second", - },{ - .name = "throttling.iops-write", - .type = QEMU_OPT_NUMBER, - .help = "limit write operations per second", - },{ - .name = "throttling.bps-total", - .type = QEMU_OPT_NUMBER, - .help = "limit total bytes per second", - },{ - .name = "throttling.bps-read", - .type = QEMU_OPT_NUMBER, - .help = "limit read bytes per second", - },{ - .name = "throttling.bps-write", - .type = QEMU_OPT_NUMBER, - .help = "limit write bytes per second", - },{ - .name = "throttling.iops-total-max", - .type = QEMU_OPT_NUMBER, - .help = "I/O operations burst", - },{ - .name = "throttling.iops-read-max", - .type = QEMU_OPT_NUMBER, - .help = "I/O operations read burst", - },{ - .name = "throttling.iops-write-max", - .type = QEMU_OPT_NUMBER, - .help = "I/O operations write burst", - },{ - .name = "throttling.bps-total-max", - .type = QEMU_OPT_NUMBER, - .help = "total bytes burst", - },{ - .name = "throttling.bps-read-max", - .type = QEMU_OPT_NUMBER, - .help = "total bytes read burst", - },{ - .name = "throttling.bps-write-max", - .type = QEMU_OPT_NUMBER, - .help = "total bytes write burst", - },{ - .name = "throttling.iops-total-max-length", - .type = QEMU_OPT_NUMBER, - .help = "length of the iops-total-max burst period, in seconds", - },{ - .name = "throttling.iops-read-max-length", - .type = QEMU_OPT_NUMBER, - .help = "length of the iops-read-max burst period, in seconds", - },{ - .name = "throttling.iops-write-max-length", - .type = QEMU_OPT_NUMBER, - .help = "length of the iops-write-max burst period, in seconds", - },{ - .name = "throttling.bps-total-max-length", - .type = QEMU_OPT_NUMBER, - .help = "length of the bps-total-max burst period, in seconds", - },{ - .name = "throttling.bps-read-max-length", - .type = QEMU_OPT_NUMBER, - .help = "length of the bps-read-max burst period, in seconds", - },{ - .name = "throttling.bps-write-max-length", - .type = QEMU_OPT_NUMBER, - .help = "length of the bps-write-max burst period, in seconds", - },{ - .name = "throttling.iops-size", - .type = QEMU_OPT_NUMBER, - .help = "when limiting by iops max size of an I/O in bytes", - },{ + }, + + THROTTLE_OPTS, + + { .name = "throttling.group", .type = QEMU_OPT_STRING, .help = "name of the block throttling group", diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 485a006aa7..7af14e29aa 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -7492,7 +7492,7 @@ uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *status) { signed char current_rounding_mode = status->float_rounding_mode; set_float_rounding_mode(float_round_to_zero, status); - int64_t v = float64_to_uint64(a, status); + uint64_t v = float64_to_uint64(a, status); set_float_rounding_mode(current_rounding_mode, status); return v; } diff --git a/fsdev/Makefile.objs b/fsdev/Makefile.objs index 1b120a4a7d..659df6e187 100644 --- a/fsdev/Makefile.objs +++ b/fsdev/Makefile.objs @@ -5,7 +5,7 @@ common-obj-y = qemu-fsdev.o 9p-marshal.o 9p-iov-marshal.o else common-obj-y = qemu-fsdev-dummy.o endif -common-obj-y += qemu-fsdev-opts.o +common-obj-y += qemu-fsdev-opts.o qemu-fsdev-throttle.o # Toplevel always builds this; targets without virtio will put it in # common-obj-y diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h index a56dc8488d..0844a403dc 100644 --- a/fsdev/file-op-9p.h +++ b/fsdev/file-op-9p.h @@ -17,6 +17,7 @@ #include <dirent.h> #include <utime.h> #include <sys/vfs.h> +#include "qemu-fsdev-throttle.h" #define SM_LOCAL_MODE_BITS 0600 #define SM_LOCAL_DIR_MODE_BITS 0700 @@ -74,6 +75,7 @@ typedef struct FsDriverEntry { char *path; int export_flags; FileOperations *ops; + FsThrottle fst; } FsDriverEntry; typedef struct FsContext @@ -83,6 +85,7 @@ typedef struct FsContext int export_flags; struct xattr_operations **xops; struct extended_ops exops; + FsThrottle *fst; /* fs driver specific data */ void *private; } FsContext; diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c index 1dd8c7a24c..bf5713008a 100644 --- a/fsdev/qemu-fsdev-opts.c +++ b/fsdev/qemu-fsdev-opts.c @@ -9,6 +9,7 @@ #include "qemu/config-file.h" #include "qemu/option.h" #include "qemu/module.h" +#include "qemu/throttle-options.h" static QemuOptsList qemu_fsdev_opts = { .name = "fsdev", @@ -39,6 +40,8 @@ static QemuOptsList qemu_fsdev_opts = { .type = QEMU_OPT_NUMBER, }, + THROTTLE_OPTS, + { /*End of list */ } }, }; diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c new file mode 100644 index 0000000000..7ae4e86646 --- /dev/null +++ b/fsdev/qemu-fsdev-throttle.c @@ -0,0 +1,118 @@ +/* + * Fsdev Throttle + * + * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH + * + * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. + * + * See the COPYING file in the top-level directory for details. + * + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu-fsdev-throttle.h" +#include "qemu/iov.h" + +static void fsdev_throttle_read_timer_cb(void *opaque) +{ + FsThrottle *fst = opaque; + qemu_co_enter_next(&fst->throttled_reqs[false]); +} + +static void fsdev_throttle_write_timer_cb(void *opaque) +{ + FsThrottle *fst = opaque; + qemu_co_enter_next(&fst->throttled_reqs[true]); +} + +void fsdev_throttle_parse_opts(QemuOpts *opts, FsThrottle *fst, Error **errp) +{ + throttle_config_init(&fst->cfg); + fst->cfg.buckets[THROTTLE_BPS_TOTAL].avg = + qemu_opt_get_number(opts, "throttling.bps-total", 0); + fst->cfg.buckets[THROTTLE_BPS_READ].avg = + qemu_opt_get_number(opts, "throttling.bps-read", 0); + fst->cfg.buckets[THROTTLE_BPS_WRITE].avg = + qemu_opt_get_number(opts, "throttling.bps-write", 0); + fst->cfg.buckets[THROTTLE_OPS_TOTAL].avg = + qemu_opt_get_number(opts, "throttling.iops-total", 0); + fst->cfg.buckets[THROTTLE_OPS_READ].avg = + qemu_opt_get_number(opts, "throttling.iops-read", 0); + fst->cfg.buckets[THROTTLE_OPS_WRITE].avg = + qemu_opt_get_number(opts, "throttling.iops-write", 0); + + fst->cfg.buckets[THROTTLE_BPS_TOTAL].max = + qemu_opt_get_number(opts, "throttling.bps-total-max", 0); + fst->cfg.buckets[THROTTLE_BPS_READ].max = + qemu_opt_get_number(opts, "throttling.bps-read-max", 0); + fst->cfg.buckets[THROTTLE_BPS_WRITE].max = + qemu_opt_get_number(opts, "throttling.bps-write-max", 0); + fst->cfg.buckets[THROTTLE_OPS_TOTAL].max = + qemu_opt_get_number(opts, "throttling.iops-total-max", 0); + fst->cfg.buckets[THROTTLE_OPS_READ].max = + qemu_opt_get_number(opts, "throttling.iops-read-max", 0); + fst->cfg.buckets[THROTTLE_OPS_WRITE].max = + qemu_opt_get_number(opts, "throttling.iops-write-max", 0); + + fst->cfg.buckets[THROTTLE_BPS_TOTAL].burst_length = + qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1); + fst->cfg.buckets[THROTTLE_BPS_READ].burst_length = + qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1); + fst->cfg.buckets[THROTTLE_BPS_WRITE].burst_length = + qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1); + fst->cfg.buckets[THROTTLE_OPS_TOTAL].burst_length = + qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1); + fst->cfg.buckets[THROTTLE_OPS_READ].burst_length = + qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1); + fst->cfg.buckets[THROTTLE_OPS_WRITE].burst_length = + qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1); + fst->cfg.op_size = + qemu_opt_get_number(opts, "throttling.iops-size", 0); + + throttle_is_valid(&fst->cfg, errp); +} + +void fsdev_throttle_init(FsThrottle *fst) +{ + if (throttle_enabled(&fst->cfg)) { + throttle_init(&fst->ts); + throttle_timers_init(&fst->tt, + qemu_get_aio_context(), + QEMU_CLOCK_REALTIME, + fsdev_throttle_read_timer_cb, + fsdev_throttle_write_timer_cb, + fst); + throttle_config(&fst->ts, &fst->tt, &fst->cfg); + qemu_co_queue_init(&fst->throttled_reqs[0]); + qemu_co_queue_init(&fst->throttled_reqs[1]); + } +} + +void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst, bool is_write, + struct iovec *iov, int iovcnt) +{ + if (throttle_enabled(&fst->cfg)) { + if (throttle_schedule_timer(&fst->ts, &fst->tt, is_write) || + !qemu_co_queue_empty(&fst->throttled_reqs[is_write])) { + qemu_co_queue_wait(&fst->throttled_reqs[is_write], NULL); + } + + throttle_account(&fst->ts, is_write, iov_size(iov, iovcnt)); + + if (!qemu_co_queue_empty(&fst->throttled_reqs[is_write]) && + !throttle_schedule_timer(&fst->ts, &fst->tt, is_write)) { + qemu_co_queue_next(&fst->throttled_reqs[is_write]); + } + } +} + +void fsdev_throttle_cleanup(FsThrottle *fst) +{ + if (throttle_enabled(&fst->cfg)) { + throttle_timers_destroy(&fst->tt); + } +} diff --git a/fsdev/qemu-fsdev-throttle.h b/fsdev/qemu-fsdev-throttle.h new file mode 100644 index 0000000000..e418643ccb --- /dev/null +++ b/fsdev/qemu-fsdev-throttle.h @@ -0,0 +1,39 @@ +/* + * Fsdev Throttle + * + * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH + * + * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. + * + * See the COPYING file in the top-level directory for details. + * + */ + +#ifndef _FSDEV_THROTTLE_H +#define _FSDEV_THROTTLE_H + +#include "block/aio.h" +#include "qemu/main-loop.h" +#include "qemu/coroutine.h" +#include "qapi/error.h" +#include "qemu/throttle.h" + +typedef struct FsThrottle { + ThrottleState ts; + ThrottleTimers tt; + ThrottleConfig cfg; + CoQueue throttled_reqs[2]; +} FsThrottle; + +void fsdev_throttle_parse_opts(QemuOpts *, FsThrottle *, Error **); + +void fsdev_throttle_init(FsThrottle *); + +void coroutine_fn fsdev_co_throttle_request(FsThrottle *, bool , + struct iovec *, int); + +void fsdev_throttle_cleanup(FsThrottle *); +#endif /* _FSDEV_THROTTLE_H */ diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 7de07e1ba6..f22a3c3654 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -13,7 +13,9 @@ #include "qemu/osdep.h" #include "9p.h" +#include "9p-local.h" #include "9p-xattr.h" +#include "9p-util.h" #include "fsdev/qemu-fsdev.h" /* local_ops */ #include <arpa/inet.h> #include <pwd.h> @@ -43,40 +45,62 @@ #define BTRFS_SUPER_MAGIC 0x9123683E #endif -#define VIRTFS_META_DIR ".virtfs_metadata" +typedef struct { + int mountfd; +} LocalData; -static char *local_mapped_attr_path(FsContext *ctx, const char *path) +int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags, + mode_t mode) { - int dirlen; - const char *name = strrchr(path, '/'); - if (name) { - dirlen = name - path; - ++name; - } else { - name = path; - dirlen = 0; + LocalData *data = fs_ctx->private; + + /* All paths are relative to the path data->mountfd points to */ + while (*path == '/') { + path++; } - return g_strdup_printf("%s/%.*s/%s/%s", ctx->fs_root, - dirlen, path, VIRTFS_META_DIR, name); + + return relative_openat_nofollow(data->mountfd, path, flags, mode); +} + +int local_opendir_nofollow(FsContext *fs_ctx, const char *path) +{ + return local_open_nofollow(fs_ctx, path, O_DIRECTORY | O_RDONLY, 0); +} + +static void renameat_preserve_errno(int odirfd, const char *opath, int ndirfd, + const char *npath) +{ + int serrno = errno; + renameat(odirfd, opath, ndirfd, npath); + errno = serrno; } -static FILE *local_fopen(const char *path, const char *mode) +static void unlinkat_preserve_errno(int dirfd, const char *path, int flags) +{ + int serrno = errno; + unlinkat(dirfd, path, flags); + errno = serrno; +} + +#define VIRTFS_META_DIR ".virtfs_metadata" + +static FILE *local_fopenat(int dirfd, const char *name, const char *mode) { int fd, o_mode = 0; FILE *fp; - int flags = O_NOFOLLOW; + int flags; /* * only supports two modes */ if (mode[0] == 'r') { - flags |= O_RDONLY; + flags = O_RDONLY; } else if (mode[0] == 'w') { - flags |= O_WRONLY | O_TRUNC | O_CREAT; + flags = O_WRONLY | O_TRUNC | O_CREAT; o_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; } else { return NULL; } - fd = open(path, flags, o_mode); + fd = openat_file(dirfd, name, flags, o_mode); if (fd == -1) { return NULL; } @@ -88,16 +112,20 @@ static FILE *local_fopen(const char *path, const char *mode) } #define ATTR_MAX 100 -static void local_mapped_file_attr(FsContext *ctx, const char *path, +static void local_mapped_file_attr(int dirfd, const char *name, struct stat *stbuf) { FILE *fp; char buf[ATTR_MAX]; - char *attr_path; + int map_dirfd; - attr_path = local_mapped_attr_path(ctx, path); - fp = local_fopen(attr_path, "r"); - g_free(attr_path); + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + if (map_dirfd == -1) { + return; + } + + fp = local_fopenat(map_dirfd, name, "r"); + close_preserve_errno(map_dirfd); if (!fp) { return; } @@ -119,12 +147,17 @@ static void local_mapped_file_attr(FsContext *ctx, const char *path, static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf) { - int err; - char *buffer; - char *path = fs_path->data; + int err = -1; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); + int dirfd; - buffer = rpath(fs_ctx, path); - err = lstat(buffer, stbuf); + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + err = fstatat(dirfd, name, stbuf, AT_SYMLINK_NOFOLLOW); if (err) { goto err_out; } @@ -134,87 +167,83 @@ static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf) gid_t tmp_gid; mode_t tmp_mode; dev_t tmp_dev; - if (getxattr(buffer, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) { + + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.uid", &tmp_uid, + sizeof(uid_t)) > 0) { stbuf->st_uid = le32_to_cpu(tmp_uid); } - if (getxattr(buffer, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) { + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.gid", &tmp_gid, + sizeof(gid_t)) > 0) { stbuf->st_gid = le32_to_cpu(tmp_gid); } - if (getxattr(buffer, "user.virtfs.mode", - &tmp_mode, sizeof(mode_t)) > 0) { + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.mode", &tmp_mode, + sizeof(mode_t)) > 0) { stbuf->st_mode = le32_to_cpu(tmp_mode); } - if (getxattr(buffer, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) { + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.rdev", &tmp_dev, + sizeof(dev_t)) > 0) { stbuf->st_rdev = le64_to_cpu(tmp_dev); } } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - local_mapped_file_attr(fs_ctx, path, stbuf); + local_mapped_file_attr(dirfd, name, stbuf); } err_out: - g_free(buffer); - return err; -} - -static int local_create_mapped_attr_dir(FsContext *ctx, const char *path) -{ - int err; - char *attr_dir; - char *tmp_path = g_strdup(path); - - attr_dir = g_strdup_printf("%s/%s/%s", - ctx->fs_root, dirname(tmp_path), VIRTFS_META_DIR); - - err = mkdir(attr_dir, 0700); - if (err < 0 && errno == EEXIST) { - err = 0; - } - g_free(attr_dir); - g_free(tmp_path); + close_preserve_errno(dirfd); +out: + g_free(name); + g_free(dirpath); return err; } -static int local_set_mapped_file_attr(FsContext *ctx, - const char *path, FsCred *credp) +static int local_set_mapped_file_attrat(int dirfd, const char *name, + FsCred *credp) { FILE *fp; - int ret = 0; + int ret; char buf[ATTR_MAX]; - char *attr_path; int uid = -1, gid = -1, mode = -1, rdev = -1; + int map_dirfd; + + ret = mkdirat(dirfd, VIRTFS_META_DIR, 0700); + if (ret < 0 && errno != EEXIST) { + return -1; + } - attr_path = local_mapped_attr_path(ctx, path); - fp = local_fopen(attr_path, "r"); + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + if (map_dirfd == -1) { + return -1; + } + + fp = local_fopenat(map_dirfd, name, "r"); if (!fp) { - goto create_map_file; + if (errno == ENOENT) { + goto update_map_file; + } else { + close_preserve_errno(map_dirfd); + return -1; + } } memset(buf, 0, ATTR_MAX); while (fgets(buf, ATTR_MAX, fp)) { if (!strncmp(buf, "virtfs.uid", 10)) { - uid = atoi(buf+11); + uid = atoi(buf + 11); } else if (!strncmp(buf, "virtfs.gid", 10)) { - gid = atoi(buf+11); + gid = atoi(buf + 11); } else if (!strncmp(buf, "virtfs.mode", 11)) { - mode = atoi(buf+12); + mode = atoi(buf + 12); } else if (!strncmp(buf, "virtfs.rdev", 11)) { - rdev = atoi(buf+12); + rdev = atoi(buf + 12); } memset(buf, 0, ATTR_MAX); } fclose(fp); - goto update_map_file; - -create_map_file: - ret = local_create_mapped_attr_dir(ctx, path); - if (ret < 0) { - goto err_out; - } update_map_file: - fp = local_fopen(attr_path, "w"); + fp = local_fopenat(map_dirfd, name, "w"); + close_preserve_errno(map_dirfd); if (!fp) { - ret = -1; - goto err_out; + return -1; } if (credp->fc_uid != -1) { @@ -230,7 +259,6 @@ update_map_file: rdev = credp->fc_rdev; } - if (uid != -1) { fprintf(fp, "virtfs.uid=%d\n", uid); } @@ -245,39 +273,71 @@ update_map_file: } fclose(fp); -err_out: - g_free(attr_path); + return 0; +} + +static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) +{ + int fd, ret; + + /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW). + * Unfortunately, the linux kernel doesn't implement it yet. As an + * alternative, let's open the file and use fchmod() instead. This + * may fail depending on the permissions of the file, but it is the + * best we can do to avoid TOCTTOU. We first try to open read-only + * in case name points to a directory. If that fails, we try write-only + * in case name doesn't point to a directory. + */ + fd = openat_file(dirfd, name, O_RDONLY, 0); + if (fd == -1) { + /* In case the file is writable-only and isn't a directory. */ + if (errno == EACCES) { + fd = openat_file(dirfd, name, O_WRONLY, 0); + } + if (fd == -1 && errno == EISDIR) { + errno = EACCES; + } + } + if (fd == -1) { + return -1; + } + ret = fchmod(fd, mode); + close_preserve_errno(fd); return ret; } -static int local_set_xattr(const char *path, FsCred *credp) +static int local_set_xattrat(int dirfd, const char *path, FsCred *credp) { int err; if (credp->fc_uid != -1) { uint32_t tmp_uid = cpu_to_le32(credp->fc_uid); - err = setxattr(path, "user.virtfs.uid", &tmp_uid, sizeof(uid_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.uid", &tmp_uid, + sizeof(uid_t), 0); if (err) { return err; } } if (credp->fc_gid != -1) { uint32_t tmp_gid = cpu_to_le32(credp->fc_gid); - err = setxattr(path, "user.virtfs.gid", &tmp_gid, sizeof(gid_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.gid", &tmp_gid, + sizeof(gid_t), 0); if (err) { return err; } } if (credp->fc_mode != -1) { uint32_t tmp_mode = cpu_to_le32(credp->fc_mode); - err = setxattr(path, "user.virtfs.mode", &tmp_mode, sizeof(mode_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.mode", &tmp_mode, + sizeof(mode_t), 0); if (err) { return err; } } if (credp->fc_rdev != -1) { uint64_t tmp_rdev = cpu_to_le64(credp->fc_rdev); - err = setxattr(path, "user.virtfs.rdev", &tmp_rdev, sizeof(dev_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.rdev", &tmp_rdev, + sizeof(dev_t), 0); if (err) { return err; } @@ -285,58 +345,56 @@ static int local_set_xattr(const char *path, FsCred *credp) return 0; } -static int local_post_create_passthrough(FsContext *fs_ctx, const char *path, - FsCred *credp) +static int local_set_cred_passthrough(FsContext *fs_ctx, int dirfd, + const char *name, FsCred *credp) { - char *buffer; - - buffer = rpath(fs_ctx, path); - if (lchown(buffer, credp->fc_uid, credp->fc_gid) < 0) { + if (fchownat(dirfd, name, credp->fc_uid, credp->fc_gid, + AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) < 0) { /* * If we fail to change ownership and if we are * using security model none. Ignore the error */ if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) { - goto err; + return -1; } } - if (chmod(buffer, credp->fc_mode & 07777) < 0) { - goto err; - } - - g_free(buffer); - return 0; -err: - g_free(buffer); - return -1; + return fchmodat_nofollow(dirfd, name, credp->fc_mode & 07777); } static ssize_t local_readlink(FsContext *fs_ctx, V9fsPath *fs_path, char *buf, size_t bufsz) { ssize_t tsize = -1; - char *buffer; - char *path = fs_path->data; if ((fs_ctx->export_flags & V9FS_SM_MAPPED) || (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE)) { int fd; - buffer = rpath(fs_ctx, path); - fd = open(buffer, O_RDONLY | O_NOFOLLOW); - g_free(buffer); + + fd = local_open_nofollow(fs_ctx, fs_path->data, O_RDONLY, 0); if (fd == -1) { return -1; } do { tsize = read(fd, (void *)buf, bufsz); } while (tsize == -1 && errno == EINTR); - close(fd); + close_preserve_errno(fd); } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - tsize = readlink(buffer, buf, bufsz); - g_free(buffer); + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); + int dirfd; + + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + tsize = readlinkat(dirfd, name, buf, bufsz); + close_preserve_errno(dirfd); + out: + g_free(name); + g_free(dirpath); } return tsize; } @@ -354,27 +412,32 @@ static int local_closedir(FsContext *ctx, V9fsFidOpenState *fs) static int local_open(FsContext *ctx, V9fsPath *fs_path, int flags, V9fsFidOpenState *fs) { - char *buffer; - char *path = fs_path->data; + int fd; - buffer = rpath(ctx, path); - fs->fd = open(buffer, flags | O_NOFOLLOW); - g_free(buffer); + fd = local_open_nofollow(ctx, fs_path->data, flags, 0); + if (fd == -1) { + return -1; + } + fs->fd = fd; return fs->fd; } static int local_opendir(FsContext *ctx, V9fsPath *fs_path, V9fsFidOpenState *fs) { - char *buffer; - char *path = fs_path->data; + int dirfd; + DIR *stream; + + dirfd = local_opendir_nofollow(ctx, fs_path->data); + if (dirfd == -1) { + return -1; + } - buffer = rpath(ctx, path); - fs->dir.stream = opendir(buffer); - g_free(buffer); - if (!fs->dir.stream) { + stream = fdopendir(dirfd); + if (!stream) { return -1; } + fs->dir.stream = stream; return 0; } @@ -463,145 +526,122 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs, static int local_chmod(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp) { - char *buffer; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); int ret = -1; - char *path = fs_path->data; + int dirfd; + + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - ret = local_set_xattr(buffer, credp); - g_free(buffer); + ret = local_set_xattrat(dirfd, name, credp); } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - return local_set_mapped_file_attr(fs_ctx, path, credp); - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - ret = chmod(buffer, credp->fc_mode); - g_free(buffer); + ret = local_set_mapped_file_attrat(dirfd, name, credp); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + ret = fchmodat_nofollow(dirfd, name, credp->fc_mode); } + close_preserve_errno(dirfd); + +out: + g_free(dirpath); + g_free(name); return ret; } static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, FsCred *credp) { - char *path; int err = -1; - int serrno = 0; - V9fsString fullname; - char *buffer = NULL; + int dirfd; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - path = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } - /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0); + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { + err = mknodat(dirfd, name, SM_LOCAL_MODE_BITS | S_IFREG, 0); if (err == -1) { goto out; } - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; - } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(fs_ctx, path); - err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0); - if (err == -1) { - goto out; + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - err = local_set_mapped_file_attr(fs_ctx, path, credp); if (err == -1) { - serrno = errno; goto err_end; } - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - err = mknod(buffer, credp->fc_mode, credp->fc_rdev); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev); if (err == -1) { goto out; } - err = local_post_create_passthrough(fs_ctx, path, credp); + err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp); if (err == -1) { - serrno = errno; goto err_end; } } goto out; err_end: - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, 0); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } static int local_mkdir(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, FsCred *credp) { - char *path; int err = -1; - int serrno = 0; - V9fsString fullname; - char *buffer = NULL; + int dirfd; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - path = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } - /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS); + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { + err = mkdirat(dirfd, name, SM_LOCAL_DIR_MODE_BITS); if (err == -1) { goto out; } - credp->fc_mode = credp->fc_mode|S_IFDIR; - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; - } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(fs_ctx, path); - err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS); - if (err == -1) { - goto out; + credp->fc_mode = credp->fc_mode | S_IFDIR; + + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - credp->fc_mode = credp->fc_mode|S_IFDIR; - err = local_set_mapped_file_attr(fs_ctx, path, credp); if (err == -1) { - serrno = errno; goto err_end; } - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - err = mkdir(buffer, credp->fc_mode); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + err = mkdirat(dirfd, name, credp->fc_mode); if (err == -1) { goto out; } - err = local_post_create_passthrough(fs_ctx, path, credp); + err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp); if (err == -1) { - serrno = errno; goto err_end; } } goto out; err_end: - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, AT_REMOVEDIR); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } @@ -649,62 +689,45 @@ static int local_fstat(FsContext *fs_ctx, int fid_type, static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, int flags, FsCred *credp, V9fsFidOpenState *fs) { - char *path; int fd = -1; int err = -1; - int serrno = 0; - V9fsString fullname; - char *buffer = NULL; + int dirfd; /* * Mark all the open to not follow symlinks */ flags |= O_NOFOLLOW; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - path = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - fd = open(buffer, flags, SM_LOCAL_MODE_BITS); + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { + fd = openat_file(dirfd, name, flags, SM_LOCAL_MODE_BITS); if (fd == -1) { - err = fd; goto out; } credp->fc_mode = credp->fc_mode|S_IFREG; - /* Set cleint credentials in xattr */ - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + /* Set cleint credentials in xattr */ + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(fs_ctx, path); - fd = open(buffer, flags, SM_LOCAL_MODE_BITS); - if (fd == -1) { - err = fd; - goto out; - } - credp->fc_mode = credp->fc_mode|S_IFREG; - /* Set client credentials in .virtfs_metadata directory files */ - err = local_set_mapped_file_attr(fs_ctx, path, credp); if (err == -1) { - serrno = errno; goto err_end; } } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - fd = open(buffer, flags, credp->fc_mode); + fd = openat_file(dirfd, name, flags, credp->fc_mode); if (fd == -1) { - err = fd; goto out; } - err = local_post_create_passthrough(fs_ctx, path, credp); + err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp); if (err == -1) { - serrno = errno; goto err_end; } } @@ -713,12 +736,11 @@ static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, goto out; err_end: - close(fd); - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, + flags & O_DIRECTORY ? AT_REMOVEDIR : 0); + close_preserve_errno(fd); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } @@ -727,23 +749,22 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath, V9fsPath *dir_path, const char *name, FsCred *credp) { int err = -1; - int serrno = 0; - char *newpath; - V9fsString fullname; - char *buffer = NULL; + int dirfd; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - newpath = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { int fd; ssize_t oldpath_size, write_size; - buffer = rpath(fs_ctx, newpath); - fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS); + + fd = openat_file(dirfd, name, O_CREAT | O_EXCL | O_RDWR, + SM_LOCAL_MODE_BITS); if (fd == -1) { - err = fd; goto out; } /* Write the oldpath (target) to the file. */ @@ -751,218 +772,204 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath, do { write_size = write(fd, (void *)oldpath, oldpath_size); } while (write_size == -1 && errno == EINTR); + close_preserve_errno(fd); if (write_size != oldpath_size) { - serrno = errno; - close(fd); - err = -1; goto err_end; } - close(fd); /* Set cleint credentials in symlink's xattr */ - credp->fc_mode = credp->fc_mode|S_IFLNK; - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; - } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - int fd; - ssize_t oldpath_size, write_size; - buffer = rpath(fs_ctx, newpath); - fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS); - if (fd == -1) { - err = fd; - goto out; - } - /* Write the oldpath (target) to the file. */ - oldpath_size = strlen(oldpath); - do { - write_size = write(fd, (void *)oldpath, oldpath_size); - } while (write_size == -1 && errno == EINTR); + credp->fc_mode = credp->fc_mode | S_IFLNK; - if (write_size != oldpath_size) { - serrno = errno; - close(fd); - err = -1; - goto err_end; + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - close(fd); - /* Set cleint credentials in symlink's xattr */ - credp->fc_mode = credp->fc_mode|S_IFLNK; - err = local_set_mapped_file_attr(fs_ctx, newpath, credp); if (err == -1) { - serrno = errno; goto err_end; } - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, newpath); - err = symlink(oldpath, buffer); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + err = symlinkat(oldpath, dirfd, name); if (err) { goto out; } - err = lchown(buffer, credp->fc_uid, credp->fc_gid); + err = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid, + AT_SYMLINK_NOFOLLOW); if (err == -1) { /* * If we fail to change ownership and if we are * using security model none. Ignore the error */ if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) { - serrno = errno; goto err_end; - } else + } else { err = 0; + } } } goto out; err_end: - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, 0); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } static int local_link(FsContext *ctx, V9fsPath *oldpath, V9fsPath *dirpath, const char *name) { - int ret; - V9fsString newpath; - char *buffer, *buffer1; + char *odirpath = g_path_get_dirname(oldpath->data); + char *oname = g_path_get_basename(oldpath->data); + int ret = -1; + int odirfd, ndirfd; - v9fs_string_init(&newpath); - v9fs_string_sprintf(&newpath, "%s/%s", dirpath->data, name); + odirfd = local_opendir_nofollow(ctx, odirpath); + if (odirfd == -1) { + goto out; + } + + ndirfd = local_opendir_nofollow(ctx, dirpath->data); + if (ndirfd == -1) { + close_preserve_errno(odirfd); + goto out; + } - buffer = rpath(ctx, oldpath->data); - buffer1 = rpath(ctx, newpath.data); - ret = link(buffer, buffer1); - g_free(buffer); - g_free(buffer1); + ret = linkat(odirfd, oname, ndirfd, name, 0); + if (ret < 0) { + goto out_close; + } /* now link the virtfs_metadata files */ - if (!ret && (ctx->export_flags & V9FS_SM_MAPPED_FILE)) { - /* Link the .virtfs_metadata files. Create the metada directory */ - ret = local_create_mapped_attr_dir(ctx, newpath.data); - if (ret < 0) { - goto err_out; + if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { + int omap_dirfd, nmap_dirfd; + + ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700); + if (ret < 0 && errno != EEXIST) { + goto err_undo_link; + } + + omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR); + if (omap_dirfd == -1) { + goto err; + } + + nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR); + if (nmap_dirfd == -1) { + close_preserve_errno(omap_dirfd); + goto err; } - buffer = local_mapped_attr_path(ctx, oldpath->data); - buffer1 = local_mapped_attr_path(ctx, newpath.data); - ret = link(buffer, buffer1); - g_free(buffer); - g_free(buffer1); + + ret = linkat(omap_dirfd, oname, nmap_dirfd, name, 0); + close_preserve_errno(nmap_dirfd); + close_preserve_errno(omap_dirfd); if (ret < 0 && errno != ENOENT) { - goto err_out; + goto err_undo_link; } - } -err_out: - v9fs_string_free(&newpath); - return ret; -} -static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size) -{ - char *buffer; - int ret; - char *path = fs_path->data; + ret = 0; + } + goto out_close; - buffer = rpath(ctx, path); - ret = truncate(buffer, size); - g_free(buffer); +err: + ret = -1; +err_undo_link: + unlinkat_preserve_errno(ndirfd, name, 0); +out_close: + close_preserve_errno(ndirfd); + close_preserve_errno(odirfd); +out: + g_free(oname); + g_free(odirpath); return ret; } -static int local_rename(FsContext *ctx, const char *oldpath, - const char *newpath) +static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size) { - int err; - char *buffer, *buffer1; + int fd, ret; - if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { - err = local_create_mapped_attr_dir(ctx, newpath); - if (err < 0) { - return err; - } - /* rename the .virtfs_metadata files */ - buffer = local_mapped_attr_path(ctx, oldpath); - buffer1 = local_mapped_attr_path(ctx, newpath); - err = rename(buffer, buffer1); - g_free(buffer); - g_free(buffer1); - if (err < 0 && errno != ENOENT) { - return err; - } + fd = local_open_nofollow(ctx, fs_path->data, O_WRONLY, 0); + if (fd == -1) { + return -1; } - - buffer = rpath(ctx, oldpath); - buffer1 = rpath(ctx, newpath); - err = rename(buffer, buffer1); - g_free(buffer); - g_free(buffer1); - return err; + ret = ftruncate(fd, size); + close_preserve_errno(fd); + return ret; } static int local_chown(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp) { - char *buffer; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); int ret = -1; - char *path = fs_path->data; + int dirfd; + + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } if ((credp->fc_uid == -1 && credp->fc_gid == -1) || (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - ret = lchown(buffer, credp->fc_uid, credp->fc_gid); - g_free(buffer); + ret = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid, + AT_SYMLINK_NOFOLLOW); } else if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - ret = local_set_xattr(buffer, credp); - g_free(buffer); + ret = local_set_xattrat(dirfd, name, credp); } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - return local_set_mapped_file_attr(fs_ctx, path, credp); + ret = local_set_mapped_file_attrat(dirfd, name, credp); } + + close_preserve_errno(dirfd); +out: + g_free(name); + g_free(dirpath); return ret; } static int local_utimensat(FsContext *s, V9fsPath *fs_path, const struct timespec *buf) { - char *buffer; - int ret; - char *path = fs_path->data; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); + int dirfd, ret = -1; + + dirfd = local_opendir_nofollow(s, dirpath); + if (dirfd == -1) { + goto out; + } - buffer = rpath(s, path); - ret = qemu_utimens(buffer, buf); - g_free(buffer); + ret = utimensat(dirfd, name, buf, AT_SYMLINK_NOFOLLOW); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(name); return ret; } -static int local_remove(FsContext *ctx, const char *path) +static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name, + int flags) { - int err; - struct stat stbuf; - char *buffer; + int ret = -1; if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(ctx, path); - err = lstat(buffer, &stbuf); - g_free(buffer); - if (err) { - goto err_out; - } - /* - * If directory remove .virtfs_metadata contained in the - * directory - */ - if (S_ISDIR(stbuf.st_mode)) { - buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root, - path, VIRTFS_META_DIR); - err = remove(buffer); - g_free(buffer); - if (err < 0 && errno != ENOENT) { + int map_dirfd; + + if (flags == AT_REMOVEDIR) { + int fd; + + fd = openat(dirfd, name, O_RDONLY | O_DIRECTORY | O_PATH); + if (fd == -1) { + goto err_out; + } + /* + * If directory remove .virtfs_metadata contained in the + * directory + */ + ret = unlinkat(fd, VIRTFS_META_DIR, AT_REMOVEDIR); + close_preserve_errno(fd); + if (ret < 0 && errno != ENOENT) { /* * We didn't had the .virtfs_metadata file. May be file created * in non-mapped mode ?. Ignore ENOENT. @@ -972,12 +979,12 @@ static int local_remove(FsContext *ctx, const char *path) } /* * Now remove the name from parent directory - * .virtfs_metadata directory + * .virtfs_metadata directory. */ - buffer = local_mapped_attr_path(ctx, path); - err = remove(buffer); - g_free(buffer); - if (err < 0 && errno != ENOENT) { + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + ret = unlinkat(map_dirfd, name, 0); + close_preserve_errno(map_dirfd); + if (ret < 0 && errno != ENOENT) { /* * We didn't had the .virtfs_metadata file. May be file created * in non-mapped mode ?. Ignore ENOENT. @@ -986,10 +993,39 @@ static int local_remove(FsContext *ctx, const char *path) } } - buffer = rpath(ctx, path); - err = remove(buffer); - g_free(buffer); + ret = unlinkat(dirfd, name, flags); +err_out: + return ret; +} + +static int local_remove(FsContext *ctx, const char *path) +{ + struct stat stbuf; + char *dirpath = g_path_get_dirname(path); + char *name = g_path_get_basename(path); + int flags = 0; + int dirfd; + int err = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd) { + goto out; + } + + if (fstatat(dirfd, path, &stbuf, AT_SYMLINK_NOFOLLOW) < 0) { + goto err_out; + } + + if (S_ISDIR(stbuf.st_mode)) { + flags |= AT_REMOVEDIR; + } + + err = local_unlinkat_common(ctx, dirfd, name, flags); err_out: + close_preserve_errno(dirfd); +out: + g_free(name); + g_free(dirpath); return err; } @@ -1013,13 +1049,11 @@ static int local_fsync(FsContext *ctx, int fid_type, static int local_statfs(FsContext *s, V9fsPath *fs_path, struct statfs *stbuf) { - char *buffer; - int ret; - char *path = fs_path->data; + int fd, ret; - buffer = rpath(s, path); - ret = statfs(buffer, stbuf); - g_free(buffer); + fd = local_open_nofollow(s, fs_path->data, O_RDONLY, 0); + ret = fstatfs(fd, stbuf); + close_preserve_errno(fd); return ret; } @@ -1071,70 +1105,105 @@ static int local_renameat(FsContext *ctx, V9fsPath *olddir, const char *new_name) { int ret; - V9fsString old_full_name, new_full_name; + int odirfd, ndirfd; + + odirfd = local_opendir_nofollow(ctx, olddir->data); + if (odirfd == -1) { + return -1; + } + + ndirfd = local_opendir_nofollow(ctx, newdir->data); + if (ndirfd == -1) { + close_preserve_errno(odirfd); + return -1; + } + + ret = renameat(odirfd, old_name, ndirfd, new_name); + if (ret < 0) { + goto out; + } + + if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { + int omap_dirfd, nmap_dirfd; - v9fs_string_init(&old_full_name); - v9fs_string_init(&new_full_name); + ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700); + if (ret < 0 && errno != EEXIST) { + goto err_undo_rename; + } + + omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR); + if (omap_dirfd == -1) { + goto err; + } + + nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR); + if (nmap_dirfd == -1) { + close_preserve_errno(omap_dirfd); + goto err; + } - v9fs_string_sprintf(&old_full_name, "%s/%s", olddir->data, old_name); - v9fs_string_sprintf(&new_full_name, "%s/%s", newdir->data, new_name); + /* rename the .virtfs_metadata files */ + ret = renameat(omap_dirfd, old_name, nmap_dirfd, new_name); + close_preserve_errno(nmap_dirfd); + close_preserve_errno(omap_dirfd); + if (ret < 0 && errno != ENOENT) { + goto err_undo_rename; + } - ret = local_rename(ctx, old_full_name.data, new_full_name.data); - v9fs_string_free(&old_full_name); - v9fs_string_free(&new_full_name); + ret = 0; + } + goto out; + +err: + ret = -1; +err_undo_rename: + renameat_preserve_errno(ndirfd, new_name, odirfd, old_name); +out: + close_preserve_errno(ndirfd); + close_preserve_errno(odirfd); return ret; } +static void v9fs_path_init_dirname(V9fsPath *path, const char *str) +{ + path->data = g_path_get_dirname(str); + path->size = strlen(path->data) + 1; +} + +static int local_rename(FsContext *ctx, const char *oldpath, + const char *newpath) +{ + int err; + char *oname = g_path_get_basename(oldpath); + char *nname = g_path_get_basename(newpath); + V9fsPath olddir, newdir; + + v9fs_path_init_dirname(&olddir, oldpath); + v9fs_path_init_dirname(&newdir, newpath); + + err = local_renameat(ctx, &olddir, oname, &newdir, nname); + + v9fs_path_free(&newdir); + v9fs_path_free(&olddir); + g_free(nname); + g_free(oname); + + return err; +} + static int local_unlinkat(FsContext *ctx, V9fsPath *dir, const char *name, int flags) { int ret; - V9fsString fullname; - char *buffer; + int dirfd; - v9fs_string_init(&fullname); - - v9fs_string_sprintf(&fullname, "%s/%s", dir->data, name); - if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { - if (flags == AT_REMOVEDIR) { - /* - * If directory remove .virtfs_metadata contained in the - * directory - */ - buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root, - fullname.data, VIRTFS_META_DIR); - ret = remove(buffer); - g_free(buffer); - if (ret < 0 && errno != ENOENT) { - /* - * We didn't had the .virtfs_metadata file. May be file created - * in non-mapped mode ?. Ignore ENOENT. - */ - goto err_out; - } - } - /* - * Now remove the name from parent directory - * .virtfs_metadata directory. - */ - buffer = local_mapped_attr_path(ctx, fullname.data); - ret = remove(buffer); - g_free(buffer); - if (ret < 0 && errno != ENOENT) { - /* - * We didn't had the .virtfs_metadata file. May be file created - * in non-mapped mode ?. Ignore ENOENT. - */ - goto err_out; - } + dirfd = local_opendir_nofollow(ctx, dir->data); + if (dirfd == -1) { + return -1; } - /* Remove the name finally */ - buffer = rpath(ctx, fullname.data); - ret = remove(buffer); - g_free(buffer); -err_out: - v9fs_string_free(&fullname); + ret = local_unlinkat_common(ctx, dirfd, name, flags); + close_preserve_errno(dirfd); return ret; } @@ -1168,8 +1237,31 @@ static int local_ioc_getversion(FsContext *ctx, V9fsPath *path, static int local_init(FsContext *ctx) { - int err = 0; struct statfs stbuf; + LocalData *data = g_malloc(sizeof(*data)); + + data->mountfd = open(ctx->fs_root, O_DIRECTORY | O_RDONLY); + if (data->mountfd == -1) { + goto err; + } + +#ifdef FS_IOC_GETVERSION + /* + * use ioc_getversion only if the ioctl is definied + */ + if (fstatfs(data->mountfd, &stbuf) < 0) { + close_preserve_errno(data->mountfd); + goto err; + } + switch (stbuf.f_type) { + case EXT2_SUPER_MAGIC: + case BTRFS_SUPER_MAGIC: + case REISERFS_SUPER_MAGIC: + case XFS_SUPER_MAGIC: + ctx->exops.get_st_gen = local_ioc_getversion; + break; + } +#endif if (ctx->export_flags & V9FS_SM_PASSTHROUGH) { ctx->xops = passthrough_xattr_ops; @@ -1185,29 +1277,28 @@ static int local_init(FsContext *ctx) ctx->xops = passthrough_xattr_ops; } ctx->export_flags |= V9FS_PATHNAME_FSCONTEXT; -#ifdef FS_IOC_GETVERSION - /* - * use ioc_getversion only if the iocl is definied - */ - err = statfs(ctx->fs_root, &stbuf); - if (!err) { - switch (stbuf.f_type) { - case EXT2_SUPER_MAGIC: - case BTRFS_SUPER_MAGIC: - case REISERFS_SUPER_MAGIC: - case XFS_SUPER_MAGIC: - ctx->exops.get_st_gen = local_ioc_getversion; - break; - } - } -#endif - return err; + + ctx->private = data; + return 0; + +err: + g_free(data); + return -1; +} + +static void local_cleanup(FsContext *ctx) +{ + LocalData *data = ctx->private; + + close(data->mountfd); + g_free(data); } static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse) { const char *sec_model = qemu_opt_get(opts, "security_model"); const char *path = qemu_opt_get(opts, "path"); + Error *err = NULL; if (!sec_model) { error_report("Security model not specified, local fs needs security model"); @@ -1236,6 +1327,13 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse) error_report("fsdev: No path specified"); return -1; } + + fsdev_throttle_parse_opts(opts, &fse->fst, &err); + if (err) { + error_reportf_err(err, "Throttle configuration is not valid: "); + return -1; + } + fse->path = g_strdup(path); return 0; @@ -1244,6 +1342,7 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse) FileOperations local_ops = { .parse_opts = local_parse_opts, .init = local_init, + .cleanup = local_cleanup, .lstat = local_lstat, .readlink = local_readlink, .close = local_close, diff --git a/hw/9pfs/9p-local.h b/hw/9pfs/9p-local.h new file mode 100644 index 0000000000..32c72749d9 --- /dev/null +++ b/hw/9pfs/9p-local.h @@ -0,0 +1,20 @@ +/* + * 9p local backend utilities + * + * Copyright IBM, Corp. 2017 + * + * Authors: + * Greg Kurz <groug@kaod.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_9P_LOCAL_H +#define QEMU_9P_LOCAL_H + +int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags, + mode_t mode); +int local_opendir_nofollow(FsContext *fs_ctx, const char *path); + +#endif diff --git a/hw/9pfs/9p-posix-acl.c b/hw/9pfs/9p-posix-acl.c index ec003181cd..bbf89064f7 100644 --- a/hw/9pfs/9p-posix-acl.c +++ b/hw/9pfs/9p-posix-acl.c @@ -25,13 +25,7 @@ static ssize_t mp_pacl_getxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size) { - char *buffer; - ssize_t ret; - - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, MAP_ACL_ACCESS, value, size); - g_free(buffer); - return ret; + return local_getxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size); } static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path, @@ -56,23 +50,16 @@ static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path, static int mp_pacl_setxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags) { - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, MAP_ACL_ACCESS, value, size, flags); - g_free(buffer); - return ret; + return local_setxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size, + flags); } static int mp_pacl_removexattr(FsContext *ctx, const char *path, const char *name) { int ret; - char *buffer; - buffer = rpath(ctx, path); - ret = lremovexattr(buffer, MAP_ACL_ACCESS); + ret = local_removexattr_nofollow(ctx, path, MAP_ACL_ACCESS); if (ret == -1 && errno == ENODATA) { /* * We don't get ENODATA error when trying to remove a @@ -82,20 +69,13 @@ static int mp_pacl_removexattr(FsContext *ctx, errno = 0; ret = 0; } - g_free(buffer); return ret; } static ssize_t mp_dacl_getxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size) { - char *buffer; - ssize_t ret; - - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, MAP_ACL_DEFAULT, value, size); - g_free(buffer); - return ret; + return local_getxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size); } static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path, @@ -120,23 +100,16 @@ static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path, static int mp_dacl_setxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags) { - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, MAP_ACL_DEFAULT, value, size, flags); - g_free(buffer); - return ret; + return local_setxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size, + flags); } static int mp_dacl_removexattr(FsContext *ctx, const char *path, const char *name) { int ret; - char *buffer; - buffer = rpath(ctx, path); - ret = lremovexattr(buffer, MAP_ACL_DEFAULT); + ret = local_removexattr_nofollow(ctx, path, MAP_ACL_DEFAULT); if (ret == -1 && errno == ENODATA) { /* * We don't get ENODATA error when trying to remove a @@ -146,7 +119,6 @@ static int mp_dacl_removexattr(FsContext *ctx, errno = 0; ret = 0; } - g_free(buffer); return ret; } diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util.c new file mode 100644 index 0000000000..fdb4d57376 --- /dev/null +++ b/hw/9pfs/9p-util.c @@ -0,0 +1,69 @@ +/* + * 9p utilities + * + * Copyright IBM, Corp. 2017 + * + * Authors: + * Greg Kurz <groug@kaod.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/xattr.h" +#include "9p-util.h" + +int relative_openat_nofollow(int dirfd, const char *path, int flags, + mode_t mode) +{ + int fd; + + fd = dup(dirfd); + if (fd == -1) { + return -1; + } + + while (*path) { + const char *c; + int next_fd; + char *head; + + /* Only relative paths without consecutive slashes */ + assert(path[0] != '/'); + + head = g_strdup(path); + c = strchr(path, '/'); + if (c) { + head[c - path] = 0; + next_fd = openat_dir(fd, head); + } else { + next_fd = openat_file(fd, head, flags, mode); + } + g_free(head); + if (next_fd == -1) { + close_preserve_errno(fd); + return -1; + } + close(fd); + fd = next_fd; + + if (!c) { + break; + } + path = c + 1; + } + + return fd; +} + +ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = lgetxattr(proc_path, name, value, size); + g_free(proc_path); + return ret; +} diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h new file mode 100644 index 0000000000..091f3ce88e --- /dev/null +++ b/hw/9pfs/9p-util.h @@ -0,0 +1,54 @@ +/* + * 9p utilities + * + * Copyright IBM, Corp. 2017 + * + * Authors: + * Greg Kurz <groug@kaod.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_9P_UTIL_H +#define QEMU_9P_UTIL_H + +static inline void close_preserve_errno(int fd) +{ + int serrno = errno; + close(fd); + errno = serrno; +} + +static inline int openat_dir(int dirfd, const char *name) +{ + return openat(dirfd, name, O_DIRECTORY | O_RDONLY | O_PATH); +} + +static inline int openat_file(int dirfd, const char *name, int flags, + mode_t mode) +{ + int fd, serrno, ret; + + fd = openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK, + mode); + if (fd == -1) { + return -1; + } + + serrno = errno; + /* O_NONBLOCK was only needed to open the file. Let's drop it. */ + ret = fcntl(fd, F_SETFL, flags); + assert(!ret); + errno = serrno; + return fd; +} + +int relative_openat_nofollow(int dirfd, const char *path, int flags, + mode_t mode); +ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name, + void *value, size_t size); +int fsetxattrat_nofollow(int dirfd, const char *path, const char *name, + void *value, size_t size, int flags); + +#endif diff --git a/hw/9pfs/9p-xattr-user.c b/hw/9pfs/9p-xattr-user.c index f87530c8b5..2c90817b75 100644 --- a/hw/9pfs/9p-xattr-user.c +++ b/hw/9pfs/9p-xattr-user.c @@ -20,9 +20,6 @@ static ssize_t mp_user_getxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size) { - char *buffer; - ssize_t ret; - if (strncmp(name, "user.virtfs.", 12) == 0) { /* * Don't allow fetch of user.virtfs namesapce @@ -31,10 +28,7 @@ static ssize_t mp_user_getxattr(FsContext *ctx, const char *path, errno = ENOATTR; return -1; } - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, name, value, size); - g_free(buffer); - return ret; + return local_getxattr_nofollow(ctx, path, name, value, size); } static ssize_t mp_user_listxattr(FsContext *ctx, const char *path, @@ -73,9 +67,6 @@ static ssize_t mp_user_listxattr(FsContext *ctx, const char *path, static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags) { - char *buffer; - int ret; - if (strncmp(name, "user.virtfs.", 12) == 0) { /* * Don't allow fetch of user.virtfs namesapce @@ -84,18 +75,12 @@ static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name, errno = EACCES; return -1; } - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, name, value, size, flags); - g_free(buffer); - return ret; + return local_setxattr_nofollow(ctx, path, name, value, size, flags); } static int mp_user_removexattr(FsContext *ctx, const char *path, const char *name) { - char *buffer; - int ret; - if (strncmp(name, "user.virtfs.", 12) == 0) { /* * Don't allow fetch of user.virtfs namesapce @@ -104,10 +89,7 @@ static int mp_user_removexattr(FsContext *ctx, errno = EACCES; return -1; } - buffer = rpath(ctx, path); - ret = lremovexattr(buffer, name); - g_free(buffer); - return ret; + return local_removexattr_nofollow(ctx, path, name); } XattrOperations mapped_user_xattr = { diff --git a/hw/9pfs/9p-xattr.c b/hw/9pfs/9p-xattr.c index 5d8595ed93..eec160b3c2 100644 --- a/hw/9pfs/9p-xattr.c +++ b/hw/9pfs/9p-xattr.c @@ -15,6 +15,8 @@ #include "9p.h" #include "fsdev/file-op-9p.h" #include "9p-xattr.h" +#include "9p-util.h" +#include "9p-local.h" static XattrOperations *get_xattr_operations(XattrOperations **h, @@ -58,6 +60,16 @@ ssize_t pt_listxattr(FsContext *ctx, const char *path, return name_size; } +static ssize_t flistxattrat_nofollow(int dirfd, const char *filename, + char *list, size_t size) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = llistxattr(proc_path, list, size); + g_free(proc_path); + return ret; +} /* * Get the list and pass to each layer to find out whether @@ -67,24 +79,37 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path, void *value, size_t vsize) { ssize_t size = 0; - char *buffer; void *ovalue = value; XattrOperations *xops; char *orig_value, *orig_value_start; ssize_t xattr_len, parsed_len = 0, attr_len; + char *dirpath, *name; + int dirfd; /* Get the actual len */ - buffer = rpath(ctx, path); - xattr_len = llistxattr(buffer, value, 0); + dirpath = g_path_get_dirname(path); + dirfd = local_opendir_nofollow(ctx, dirpath); + g_free(dirpath); + if (dirfd == -1) { + return -1; + } + + name = g_path_get_basename(path); + xattr_len = flistxattrat_nofollow(dirfd, name, value, 0); if (xattr_len <= 0) { - g_free(buffer); + g_free(name); + close_preserve_errno(dirfd); return xattr_len; } /* Now fetch the xattr and find the actual size */ orig_value = g_malloc(xattr_len); - xattr_len = llistxattr(buffer, orig_value, xattr_len); - g_free(buffer); + xattr_len = flistxattrat_nofollow(dirfd, name, orig_value, xattr_len); + g_free(name); + close_preserve_errno(dirfd); + if (xattr_len < 0) { + return -1; + } /* store the orig pointer */ orig_value_start = orig_value; @@ -143,6 +168,135 @@ int v9fs_remove_xattr(FsContext *ctx, } +ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size) +{ + char *dirpath = g_path_get_dirname(path); + char *filename = g_path_get_basename(path); + int dirfd; + ssize_t ret = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + ret = fgetxattrat_nofollow(dirfd, filename, name, value, size); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(filename); + return ret; +} + +ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size) +{ + return local_getxattr_nofollow(ctx, path, name, value, size); +} + +int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size, int flags) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = lsetxattr(proc_path, name, value, size, flags); + g_free(proc_path); + return ret; +} + +ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size, + int flags) +{ + char *dirpath = g_path_get_dirname(path); + char *filename = g_path_get_basename(path); + int dirfd; + ssize_t ret = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + ret = fsetxattrat_nofollow(dirfd, filename, name, value, size, flags); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(filename); + return ret; +} + +int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value, + size_t size, int flags) +{ + return local_setxattr_nofollow(ctx, path, name, value, size, flags); +} + +static ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, + const char *name) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = lremovexattr(proc_path, name); + g_free(proc_path); + return ret; +} + +ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path, + const char *name) +{ + char *dirpath = g_path_get_dirname(path); + char *filename = g_path_get_basename(path); + int dirfd; + ssize_t ret = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + ret = fremovexattrat_nofollow(dirfd, filename, name); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(filename); + return ret; +} + +int pt_removexattr(FsContext *ctx, const char *path, const char *name) +{ + return local_removexattr_nofollow(ctx, path, name); +} + +ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size) +{ + errno = ENOTSUP; + return -1; +} + +int notsup_setxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size, int flags) +{ + errno = ENOTSUP; + return -1; +} + +ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name, + void *value, size_t size) +{ + return 0; +} + +int notsup_removexattr(FsContext *ctx, const char *path, const char *name) +{ + errno = ENOTSUP; + return -1; +} + XattrOperations *mapped_xattr_ops[] = { &mapped_user_xattr, &mapped_pacl_xattr, diff --git a/hw/9pfs/9p-xattr.h b/hw/9pfs/9p-xattr.h index a853ea641c..0d83996575 100644 --- a/hw/9pfs/9p-xattr.h +++ b/hw/9pfs/9p-xattr.h @@ -29,6 +29,13 @@ typedef struct xattr_operations const char *path, const char *name); } XattrOperations; +ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size); +ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size, + int flags); +ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path, + const char *name); extern XattrOperations mapped_user_xattr; extern XattrOperations passthrough_user_xattr; @@ -49,73 +56,21 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path, void *value, int v9fs_set_xattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags); int v9fs_remove_xattr(FsContext *ctx, const char *path, const char *name); + ssize_t pt_listxattr(FsContext *ctx, const char *path, char *name, void *value, size_t size); - -static inline ssize_t pt_getxattr(FsContext *ctx, const char *path, - const char *name, void *value, size_t size) -{ - char *buffer; - ssize_t ret; - - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, name, value, size); - g_free(buffer); - return ret; -} - -static inline int pt_setxattr(FsContext *ctx, const char *path, - const char *name, void *value, - size_t size, int flags) -{ - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, name, value, size, flags); - g_free(buffer); - return ret; -} - -static inline int pt_removexattr(FsContext *ctx, - const char *path, const char *name) -{ - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lremovexattr(path, name); - g_free(buffer); - return ret; -} - -static inline ssize_t notsup_getxattr(FsContext *ctx, const char *path, - const char *name, void *value, - size_t size) -{ - errno = ENOTSUP; - return -1; -} - -static inline int notsup_setxattr(FsContext *ctx, const char *path, - const char *name, void *value, - size_t size, int flags) -{ - errno = ENOTSUP; - return -1; -} - -static inline ssize_t notsup_listxattr(FsContext *ctx, const char *path, - char *name, void *value, size_t size) -{ - return 0; -} - -static inline int notsup_removexattr(FsContext *ctx, - const char *path, const char *name) -{ - errno = ENOTSUP; - return -1; -} +ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size); +int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value, + size_t size, int flags); +int pt_removexattr(FsContext *ctx, const char *path, const char *name); + +ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size); +int notsup_setxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size, int flags); +ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name, + void *value, size_t size); +int notsup_removexattr(FsContext *ctx, const char *path, const char *name); #endif diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 3af1c93dc8..76c9247c77 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -3010,7 +3010,6 @@ out_nofid: */ static void coroutine_fn v9fs_lock(void *opaque) { - int8_t status; V9fsFlock flock; size_t offset = 7; struct stat stbuf; @@ -3018,7 +3017,6 @@ static void coroutine_fn v9fs_lock(void *opaque) int32_t fid, err = 0; V9fsPDU *pdu = opaque; - status = P9_LOCK_ERROR; v9fs_string_init(&flock.client_id); err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type, &flock.flags, &flock.start, &flock.length, @@ -3044,15 +3042,15 @@ static void coroutine_fn v9fs_lock(void *opaque) if (err < 0) { goto out; } - status = P9_LOCK_SUCCESS; + err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS); + if (err < 0) { + goto out; + } + err += offset; + trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS); out: put_fid(pdu, fidp); out_nofid: - err = pdu_marshal(pdu, offset, "b", status); - if (err > 0) { - err += offset; - } - trace_v9fs_lock_return(pdu->tag, pdu->id, status); pdu_complete(pdu, err); v9fs_string_free(&flock.client_id); } @@ -3531,6 +3529,10 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp) error_setg(errp, "share path %s is not a directory", fse->path); goto out; } + + s->ctx.fst = &fse->fst; + fsdev_throttle_init(s->ctx.fst); + v9fs_path_free(&path); rc = 0; @@ -3551,6 +3553,7 @@ void v9fs_device_unrealize_common(V9fsState *s, Error **errp) if (s->ops->cleanup) { s->ops->cleanup(&s->ctx); } + fsdev_throttle_cleanup(s->ctx.fst); g_free(s->tag); g_free(s->ctx.fs_root); } diff --git a/hw/9pfs/Makefile.objs b/hw/9pfs/Makefile.objs index da0ae0cfdb..32197e6671 100644 --- a/hw/9pfs/Makefile.objs +++ b/hw/9pfs/Makefile.objs @@ -1,4 +1,4 @@ -common-obj-y = 9p.o +common-obj-y = 9p.o 9p-util.o common-obj-y += 9p-local.o 9p-xattr.o common-obj-y += 9p-xattr-user.o 9p-posix-acl.o common-obj-y += coth.o cofs.o codir.o cofile.o diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c index 120e267108..88791bc327 100644 --- a/hw/9pfs/cofile.c +++ b/hw/9pfs/cofile.c @@ -247,6 +247,7 @@ int coroutine_fn v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp, if (v9fs_request_cancelled(pdu)) { return -EINTR; } + fsdev_co_throttle_request(s->ctx.fst, true, iov, iovcnt); v9fs_co_run_in_worker( { err = s->ops->pwritev(&s->ctx, &fidp->fs, iov, iovcnt, offset); @@ -266,6 +267,7 @@ int coroutine_fn v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp, if (v9fs_request_cancelled(pdu)) { return -EINTR; } + fsdev_co_throttle_request(s->ctx.fst, false, iov, iovcnt); v9fs_co_run_in_worker( { err = s->ops->preadv(&s->ctx, &fidp->fs, iov, iovcnt, offset); diff --git a/hw/acpi/tco.c b/hw/acpi/tco.c index 8ce7daf23a..b4adac88cd 100644 --- a/hw/acpi/tco.c +++ b/hw/acpi/tco.c @@ -49,6 +49,7 @@ static inline void tco_timer_reload(TCOIORegs *tr) static inline void tco_timer_stop(TCOIORegs *tr) { tr->expire_time = -1; + timer_del(tr->tco_timer); } static void tco_timer_expired(void *opaque) diff --git a/hw/core/loader.c b/hw/core/loader.c index 8b980e91fb..bf17b42cbe 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -435,6 +435,19 @@ int load_elf_as(const char *filename, uint64_t *highaddr, int big_endian, int elf_machine, int clear_lsb, int data_swab, AddressSpace *as) { + return load_elf_ram(filename, translate_fn, translate_opaque, + pentry, lowaddr, highaddr, big_endian, elf_machine, + clear_lsb, data_swab, as, true); +} + +/* return < 0 if error, otherwise the number of bytes loaded in memory */ +int load_elf_ram(const char *filename, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, + uint64_t *highaddr, int big_endian, int elf_machine, + int clear_lsb, int data_swab, AddressSpace *as, + bool load_rom) +{ int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED; uint8_t e_ident[EI_NIDENT]; @@ -473,11 +486,11 @@ int load_elf_as(const char *filename, if (e_ident[EI_CLASS] == ELFCLASS64) { ret = load_elf64(filename, fd, translate_fn, translate_opaque, must_swab, pentry, lowaddr, highaddr, elf_machine, clear_lsb, - data_swab, as); + data_swab, as, load_rom); } else { ret = load_elf32(filename, fd, translate_fn, translate_opaque, must_swab, pentry, lowaddr, highaddr, elf_machine, clear_lsb, - data_swab, as); + data_swab, as, load_rom); } fail: diff --git a/hw/core/or-irq.c b/hw/core/or-irq.c index 1ac090d1a4..1485d5b285 100644 --- a/hw/core/or-irq.c +++ b/hw/core/or-irq.c @@ -89,6 +89,9 @@ static void or_irq_class_init(ObjectClass *klass, void *data) dc->props = or_irq_properties; dc->realize = or_irq_realize; dc->vmsd = &vmstate_or_irq; + + /* Reason: Needs to be wired up to work, e.g. see stm32f205_soc.c */ + dc->cannot_instantiate_with_device_add_yet = true; } static const TypeInfo or_irq_type_info = { diff --git a/hw/core/register.c b/hw/core/register.c index 4bfbc508de..dc335a79a9 100644 --- a/hw/core/register.c +++ b/hw/core/register.c @@ -59,6 +59,15 @@ static inline uint64_t register_read_val(RegisterInfo *reg) return 0; /* unreachable */ } +static inline uint64_t register_enabled_mask(int data_size, unsigned size) +{ + if (data_size < size) { + size = data_size; + } + + return MAKE_64BIT_MASK(0, size * 8); +} + void register_write(RegisterInfo *reg, uint64_t val, uint64_t we, const char *prefix, bool debug) { @@ -192,11 +201,7 @@ void register_write_memory(void *opaque, hwaddr addr, } /* Generate appropriate write enable mask */ - if (reg->data_size < size) { - we = MAKE_64BIT_MASK(0, reg->data_size * 8); - } else { - we = MAKE_64BIT_MASK(0, size * 8); - } + we = register_enabled_mask(reg->data_size, size); register_write(reg, value, we, reg_array->prefix, reg_array->debug); @@ -208,6 +213,7 @@ uint64_t register_read_memory(void *opaque, hwaddr addr, RegisterInfoArray *reg_array = opaque; RegisterInfo *reg = NULL; uint64_t read_val; + uint64_t re; int i; for (i = 0; i < reg_array->num_elements; i++) { @@ -223,7 +229,10 @@ uint64_t register_read_memory(void *opaque, hwaddr addr, return 0; } - read_val = register_read(reg, size * 8, reg_array->prefix, + /* Generate appropriate read enable mask */ + re = register_enabled_mask(reg->data_size, size); + + read_val = register_read(reg, re, reg_array->prefix, reg_array->debug); return extract64(read_val, 0, size * 8); @@ -274,9 +283,18 @@ void register_finalize_block(RegisterInfoArray *r_array) g_free(r_array); } +static void register_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + /* Reason: needs to be wired up to work */ + dc->cannot_instantiate_with_device_add_yet = true; +} + static const TypeInfo register_info = { .name = TYPE_REGISTER, .parent = TYPE_DEVICE, + .class_init = register_class_init, }; static void register_register_types(void) diff --git a/hw/display/milkymist-tmu2.c b/hw/display/milkymist-tmu2.c index 7528665510..59120ddb67 100644 --- a/hw/display/milkymist-tmu2.c +++ b/hw/display/milkymist-tmu2.c @@ -293,7 +293,7 @@ static void tmu2_start(MilkymistTMU2State *s) cpu_physical_memory_unmap(mesh, mesh_len, 0, mesh_len); /* Write back the OpenGL framebuffer to the QEMU framebuffer */ - fb_len = 2 * s->regs[R_DSTHRES] * s->regs[R_DSTVRES]; + fb_len = 2ULL * s->regs[R_DSTHRES] * s->regs[R_DSTVRES]; fb = cpu_physical_memory_map(s->regs[R_DSTFBUF], &fb_len, 1); if (fb == NULL) { glDeleteTextures(1, &texture); diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index e99d4544a2..d4de8ad9f1 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -508,7 +508,7 @@ static void gem_update_int_status(CadenceGEMState *s) if ((s->num_priority_queues == 1) && s->regs[GEM_ISR]) { /* No priority queues, just trigger the interrupt */ - DB_PRINT("asserting int.\n", i); + DB_PRINT("asserting int.\n"); qemu_set_irq(s->irq[0], 1); return; } diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 2e2664f22e..7978c7d52a 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -20,6 +20,7 @@ #include "hw/s390x/virtio-ccw.h" #include "hw/s390x/css.h" #include "ipl.h" +#include "qemu/error-report.h" #define KERN_IMAGE_START 0x010000UL #define KERN_PARM_AREA 0x010480UL @@ -209,6 +210,7 @@ static Property s390_ipl_properties[] = { DEFINE_PROP_STRING("initrd", S390IPLState, initrd), DEFINE_PROP_STRING("cmdline", S390IPLState, cmdline), DEFINE_PROP_STRING("firmware", S390IPLState, firmware), + DEFINE_PROP_STRING("netboot_fw", S390IPLState, netboot_fw), DEFINE_PROP_BOOL("enforce_bios", S390IPLState, enforce_bios, false), DEFINE_PROP_BOOL("iplbext_migration", S390IPLState, iplbext_migration, true), @@ -226,6 +228,12 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl) TYPE_VIRTIO_CCW_DEVICE); SCSIDevice *sd = (SCSIDevice *) object_dynamic_cast(OBJECT(dev_st), TYPE_SCSI_DEVICE); + VirtIONet *vn = (VirtIONet *) object_dynamic_cast(OBJECT(dev_st), + TYPE_VIRTIO_NET); + + if (vn) { + ipl->netboot = true; + } if (virtio_ccw_dev) { CcwDevice *ccw_dev = CCW_DEVICE(virtio_ccw_dev); @@ -258,12 +266,86 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl) return false; } +static int load_netboot_image(Error **errp) +{ + S390IPLState *ipl = get_ipl_device(); + char *netboot_filename; + MemoryRegion *sysmem = get_system_memory(); + MemoryRegion *mr = NULL; + void *ram_ptr = NULL; + int img_size = -1; + + mr = memory_region_find(sysmem, 0, 1).mr; + if (!mr) { + error_setg(errp, "Failed to find memory region at address 0"); + return -1; + } + + ram_ptr = memory_region_get_ram_ptr(mr); + if (!ram_ptr) { + error_setg(errp, "No RAM found"); + goto unref_mr; + } + + netboot_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, ipl->netboot_fw); + if (netboot_filename == NULL) { + error_setg(errp, "Could not find network bootloader"); + goto unref_mr; + } + + img_size = load_elf_ram(netboot_filename, NULL, NULL, &ipl->start_addr, + NULL, NULL, 1, EM_S390, 0, 0, NULL, false); + + if (img_size < 0) { + img_size = load_image_size(netboot_filename, ram_ptr, ram_size); + ipl->start_addr = KERN_IMAGE_START; + } + + if (img_size < 0) { + error_setg(errp, "Failed to load network bootloader"); + } + + g_free(netboot_filename); + +unref_mr: + memory_region_unref(mr); + return img_size; +} + +static bool is_virtio_net_device(IplParameterBlock *iplb) +{ + uint8_t cssid; + uint8_t ssid; + uint16_t devno; + uint16_t schid; + SubchDev *sch = NULL; + + if (iplb->pbt != S390_IPL_TYPE_CCW) { + return false; + } + + devno = be16_to_cpu(iplb->ccw.devno); + ssid = iplb->ccw.ssid & 3; + + for (schid = 0; schid < MAX_SCHID; schid++) { + for (cssid = 0; cssid < MAX_CSSID; cssid++) { + sch = css_find_subch(1, cssid, ssid, schid); + + if (sch && sch->devno == devno) { + return sch->id.cu_model == VIRTIO_ID_NET; + } + } + } + return false; +} + void s390_ipl_update_diag308(IplParameterBlock *iplb) { S390IPLState *ipl = get_ipl_device(); ipl->iplb = *iplb; ipl->iplb_valid = true; + ipl->netboot = is_virtio_net_device(iplb); } IplParameterBlock *s390_ipl_get_iplb(void) @@ -287,6 +369,7 @@ void s390_reipl_request(void) void s390_ipl_prepare_cpu(S390CPU *cpu) { S390IPLState *ipl = get_ipl_device(); + Error *err = NULL; cpu->env.psw.addr = ipl->start_addr; cpu->env.psw.mask = IPL_PSW_MASK; @@ -297,6 +380,13 @@ void s390_ipl_prepare_cpu(S390CPU *cpu) ipl->iplb_valid = s390_gen_initial_iplb(ipl); } } + if (ipl->netboot) { + if (load_netboot_image(&err) < 0) { + error_report_err(err); + vm_stop(RUN_STATE_INTERNAL_ERROR); + } + ipl->iplb.ccw.netboot_start_addr = ipl->start_addr; + } } static void s390_ipl_reset(DeviceState *dev) diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h index c89109585a..46930e4c64 100644 --- a/hw/s390x/ipl.h +++ b/hw/s390x/ipl.h @@ -16,7 +16,8 @@ #include "cpu.h" struct IplBlockCcw { - uint8_t reserved0[85]; + uint64_t netboot_start_addr; + uint8_t reserved0[77]; uint8_t ssid; uint16_t devno; uint8_t vm_flags; @@ -100,12 +101,14 @@ struct S390IPLState { IplParameterBlock iplb; bool iplb_valid; bool reipl_requested; + bool netboot; /*< public >*/ char *kernel; char *initrd; char *cmdline; char *firmware; + char *netboot_fw; uint8_t cssid; uint8_t ssid; uint16_t devno; diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 4f0d62b2d8..40914fde6f 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -116,7 +116,8 @@ static void ccw_init(MachineState *machine) /* get a BUS */ css_bus = virtual_css_bus_init(); s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline, - machine->initrd_filename, "s390-ccw.img", true); + machine->initrd_filename, "s390-ccw.img", + "s390-netboot.img", true); s390_flic_init(); dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE); diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c index 9cfb09057e..afa4148e6b 100644 --- a/hw/s390x/s390-virtio.c +++ b/hw/s390x/s390-virtio.c @@ -65,6 +65,7 @@ void s390_init_ipl_dev(const char *kernel_filename, const char *kernel_cmdline, const char *initrd_filename, const char *firmware, + const char *netboot_fw, bool enforce_bios) { Object *new = object_new(TYPE_S390_IPL); @@ -78,6 +79,7 @@ void s390_init_ipl_dev(const char *kernel_filename, } qdev_prop_set_string(dev, "cmdline", kernel_cmdline); qdev_prop_set_string(dev, "firmware", firmware); + qdev_prop_set_string(dev, "netboot_fw", netboot_fw); qdev_prop_set_bit(dev, "enforce_bios", enforce_bios); object_property_add_child(qdev_get_machine(), TYPE_S390_IPL, new, NULL); diff --git a/hw/s390x/s390-virtio.h b/hw/s390x/s390-virtio.h index f588b80a6e..f2377a3e0e 100644 --- a/hw/s390x/s390-virtio.h +++ b/hw/s390x/s390-virtio.h @@ -24,6 +24,7 @@ void s390_init_ipl_dev(const char *kernel_filename, const char *kernel_cmdline, const char *initrd_filename, const char *firmware, + const char *netboot_fw, bool enforce_bios); void s390_create_virtio_net(BusState *bus, const char *name); void s390_nmi(NMIState *n, int cpu_index, Error **errp); diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h index 25659b93be..a172a6068a 100644 --- a/include/hw/elf_ops.h +++ b/include/hw/elf_ops.h @@ -264,7 +264,7 @@ static int glue(load_elf, SZ)(const char *name, int fd, int must_swab, uint64_t *pentry, uint64_t *lowaddr, uint64_t *highaddr, int elf_machine, int clear_lsb, int data_swab, - AddressSpace *as) + AddressSpace *as, bool load_rom) { struct elfhdr ehdr; struct elf_phdr *phdr = NULL, *ph; @@ -403,10 +403,15 @@ static int glue(load_elf, SZ)(const char *name, int fd, *pentry = ehdr.e_entry - ph->p_vaddr + ph->p_paddr; } - snprintf(label, sizeof(label), "phdr #%d: %s", i, name); + if (load_rom) { + snprintf(label, sizeof(label), "phdr #%d: %s", i, name); - /* rom_add_elf_program() seize the ownership of 'data' */ - rom_add_elf_program(label, data, file_size, mem_size, addr, as); + /* rom_add_elf_program() seize the ownership of 'data' */ + rom_add_elf_program(label, data, file_size, mem_size, addr, as); + } else { + cpu_physical_memory_write(addr, data, file_size); + g_free(data); + } total_size += mem_size; if (addr < low) diff --git a/include/hw/loader.h b/include/hw/loader.h index 40c4153e58..490c9ff8e6 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -65,7 +65,7 @@ int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz); #define ELF_LOAD_WRONG_ENDIAN -4 const char *load_elf_strerror(int error); -/** load_elf_as: +/** load_elf_ram: * @filename: Path of ELF file * @translate_fn: optional function to translate load addresses * @translate_opaque: opaque data passed to @translate_fn @@ -81,6 +81,7 @@ const char *load_elf_strerror(int error); * words and 3 for within doublewords. * @as: The AddressSpace to load the ELF to. The value of address_space_memory * is used if nothing is supplied here. + * @load_rom : Load ELF binary as ROM * * Load an ELF file's contents to the emulated system's address space. * Clients may optionally specify a callback to perform address @@ -93,6 +94,16 @@ const char *load_elf_strerror(int error); * If @elf_machine is EM_NONE then the machine type will be read from the * ELF header and no checks will be carried out against the machine type. */ +int load_elf_ram(const char *filename, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, + uint64_t *highaddr, int big_endian, int elf_machine, + int clear_lsb, int data_swab, AddressSpace *as, + bool load_rom); + +/** load_elf_as: + * Same as load_elf_ram(), but always loads the elf as ROM + */ int load_elf_as(const char *filename, uint64_t (*translate_fn)(void *, uint64_t), void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, diff --git a/include/qemu-common.h b/include/qemu-common.h index 1430390eb6..d218821c14 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -19,7 +19,7 @@ #include "qemu/option.h" /* Copyright string for -version arguments, About dialogs, etc */ -#define QEMU_COPYRIGHT "Copyright (c) 2003-2016 " \ +#define QEMU_COPYRIGHT "Copyright (c) 2003-2017 " \ "Fabrice Bellard and the QEMU Project developers" /* main function, renamed */ diff --git a/include/qemu/throttle-options.h b/include/qemu/throttle-options.h new file mode 100644 index 0000000000..3133d1ca40 --- /dev/null +++ b/include/qemu/throttle-options.h @@ -0,0 +1,92 @@ +/* + * QEMU throttling command line options + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. + * + * See the COPYING file in the top-level directory for details. + * + */ +#ifndef THROTTLE_OPTIONS_H +#define THROTTLE_OPTIONS_H + +#define THROTTLE_OPTS \ + { \ + .name = "throttling.iops-total",\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit total I/O operations per second",\ + },{ \ + .name = "throttling.iops-read",\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit read operations per second",\ + },{ \ + .name = "throttling.iops-write",\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit write operations per second",\ + },{ \ + .name = "throttling.bps-total",\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit total bytes per second",\ + },{ \ + .name = "throttling.bps-read",\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit read bytes per second",\ + },{ \ + .name = "throttling.bps-write",\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit write bytes per second",\ + },{ \ + .name = "throttling.iops-total-max",\ + .type = QEMU_OPT_NUMBER,\ + .help = "I/O operations burst",\ + },{ \ + .name = "throttling.iops-read-max",\ + .type = QEMU_OPT_NUMBER,\ + .help = "I/O operations read burst",\ + },{ \ + .name = "throttling.iops-write-max",\ + .type = QEMU_OPT_NUMBER,\ + .help = "I/O operations write burst",\ + },{ \ + .name = "throttling.bps-total-max",\ + .type = QEMU_OPT_NUMBER,\ + .help = "total bytes burst",\ + },{ \ + .name = "throttling.bps-read-max",\ + .type = QEMU_OPT_NUMBER,\ + .help = "total bytes read burst",\ + },{ \ + .name = "throttling.bps-write-max",\ + .type = QEMU_OPT_NUMBER,\ + .help = "total bytes write burst",\ + },{ \ + .name = "throttling.iops-total-max-length",\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the iops-total-max burst period, in seconds",\ + },{ \ + .name = "throttling.iops-read-max-length",\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the iops-read-max burst period, in seconds",\ + },{ \ + .name = "throttling.iops-write-max-length",\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the iops-write-max burst period, in seconds",\ + },{ \ + .name = "throttling.bps-total-max-length",\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the bps-total-max burst period, in seconds",\ + },{ \ + .name = "throttling.bps-read-max-length",\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the bps-read-max burst period, in seconds",\ + },{ \ + .name = "throttling.bps-write-max-length",\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the bps-write-max burst period, in seconds",\ + },{ \ + .name = "throttling.iops-size",\ + .type = QEMU_OPT_NUMBER,\ + .help = "when limiting by iops max size of an I/O in bytes",\ + } + +#endif diff --git a/net/vhost-user.c b/net/vhost-user.c index 77b8110f8c..e7e63408a1 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -190,7 +190,35 @@ static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond, qemu_chr_fe_disconnect(&s->chr); - return FALSE; + return TRUE; +} + +static void net_vhost_user_event(void *opaque, int event); + +static void chr_closed_bh(void *opaque) +{ + const char *name = opaque; + NetClientState *ncs[MAX_QUEUE_NUM]; + VhostUserState *s; + Error *err = NULL; + int queues; + + queues = qemu_find_net_clients_except(name, ncs, + NET_CLIENT_DRIVER_NIC, + MAX_QUEUE_NUM); + assert(queues < MAX_QUEUE_NUM); + + s = DO_UPCAST(VhostUserState, nc, ncs[0]); + + qmp_set_link(name, false, &err); + vhost_user_stop(queues, ncs); + + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event, + opaque, NULL, true); + + if (err) { + error_report_err(err); + } } static void net_vhost_user_event(void *opaque, int event) @@ -212,20 +240,31 @@ static void net_vhost_user_event(void *opaque, int event) trace_vhost_user_event(chr->label, event); switch (event) { case CHR_EVENT_OPENED: - s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP, - net_vhost_user_watch, s); if (vhost_user_start(queues, ncs, &s->chr) < 0) { qemu_chr_fe_disconnect(&s->chr); return; } + s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP, + net_vhost_user_watch, s); qmp_set_link(name, true, &err); s->started = true; break; case CHR_EVENT_CLOSED: - qmp_set_link(name, false, &err); - vhost_user_stop(queues, ncs); - g_source_remove(s->watch); - s->watch = 0; + /* a close event may happen during a read/write, but vhost + * code assumes the vhost_dev remains setup, so delay the + * stop & clear to idle. + * FIXME: better handle failure in vhost code, remove bh + */ + if (s->watch) { + AioContext *ctx = qemu_get_current_aio_context(); + + g_source_remove(s->watch); + s->watch = 0; + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL, + NULL, NULL, false); + + aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque); + } break; } diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img Binary files differindex cf05bf0be2..2a4adfa654 100644 --- a/pc-bios/s390-ccw.img +++ b/pc-bios/s390-ccw.img diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c index 611102e3ef..b21c877b53 100644 --- a/pc-bios/s390-ccw/bootmap.c +++ b/pc-bios/s390-ccw/bootmap.c @@ -724,11 +724,17 @@ static void zipl_load_vscsi(void) void zipl_load(void) { - if (virtio_get_device()->is_cdrom) { + VDev *vdev = virtio_get_device(); + + if (vdev->is_cdrom) { ipl_iso_el_torito(); panic("\n! Cannot IPL this ISO image !\n"); } + if (virtio_get_device_type() == VIRTIO_ID_NET) { + jump_to_IPL_code(vdev->netboot_start_addr); + } + ipl_scsi(); switch (virtio_get_device_type()) { diff --git a/pc-bios/s390-ccw/iplb.h b/pc-bios/s390-ccw/iplb.h index 86abc56a90..890aed9ece 100644 --- a/pc-bios/s390-ccw/iplb.h +++ b/pc-bios/s390-ccw/iplb.h @@ -13,7 +13,8 @@ #define IPLB_H struct IplBlockCcw { - uint8_t reserved0[85]; + uint64_t netboot_start_addr; + uint8_t reserved0[77]; uint8_t ssid; uint16_t devno; uint8_t vm_flags; diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c index 345b848752..0946766d86 100644 --- a/pc-bios/s390-ccw/main.c +++ b/pc-bios/s390-ccw/main.c @@ -53,6 +53,12 @@ static bool find_dev(Schib *schib, int dev_no) if (!virtio_is_supported(blk_schid)) { continue; } + /* Skip net devices since no IPLB is created and therefore no + * no network bootloader has been loaded + */ + if (virtio_get_device_type() == VIRTIO_ID_NET && dev_no < 0) { + continue; + } if ((dev_no < 0) || (schib->pmcw.dev == dev_no)) { return true; } @@ -67,6 +73,7 @@ static void virtio_setup(void) int ssid; bool found = false; uint16_t dev_no; + VDev *vdev = virtio_get_device(); /* * We unconditionally enable mss support. In every sane configuration, @@ -85,9 +92,6 @@ static void virtio_setup(void) found = find_dev(&schib, dev_no); break; case S390_IPL_TYPE_QEMU_SCSI: - { - VDev *vdev = virtio_get_device(); - vdev->scsi_device_selected = true; vdev->selected_scsi_device.channel = iplb.scsi.channel; vdev->selected_scsi_device.target = iplb.scsi.target; @@ -95,7 +99,6 @@ static void virtio_setup(void) blk_schid.ssid = iplb.scsi.ssid & 0x3; found = find_dev(&schib, iplb.scsi.devno); break; - } default: panic("List-directed IPL not supported yet!\n"); } @@ -111,9 +114,14 @@ static void virtio_setup(void) IPL_assert(found, "No virtio device found"); - virtio_setup_device(blk_schid); + if (virtio_get_device_type() == VIRTIO_ID_NET) { + sclp_print("Network boot device detected\n"); + vdev->netboot_start_addr = iplb.ccw.netboot_start_addr; + } else { + virtio_setup_device(blk_schid); - IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); + IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); + } } int main(void) diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c index b333734955..6ee93d56db 100644 --- a/pc-bios/s390-ccw/virtio.c +++ b/pc-bios/s390-ccw/virtio.c @@ -585,6 +585,7 @@ bool virtio_is_supported(SubChannelId schid) switch (vdev.senseid.cu_model) { case VIRTIO_ID_BLOCK: case VIRTIO_ID_SCSI: + case VIRTIO_ID_NET: return true; } } diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h index eb35ea5faf..3388a423e5 100644 --- a/pc-bios/s390-ccw/virtio.h +++ b/pc-bios/s390-ccw/virtio.h @@ -276,6 +276,7 @@ struct VDev { uint8_t scsi_dev_heads; bool scsi_device_selected; ScsiDevice selected_scsi_device; + uint64_t netboot_start_addr; }; typedef struct VDev VDev; diff --git a/qemu-options.hx b/qemu-options.hx index bf458f83c3..c85f77d1d8 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -744,7 +744,12 @@ ETEXI DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev, "-fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}]\n" - " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd]\n", + " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd]\n" + " [[,throttling.bps-total=b]|[[,throttling.bps-read=r][,throttling.bps-write=w]]]\n" + " [[,throttling.iops-total=i]|[[,throttling.iops-read=r][,throttling.iops-write=w]]]\n" + " [[,throttling.bps-total-max=bm]|[[,throttling.bps-read-max=rm][,throttling.bps-write-max=wm]]]\n" + " [[,throttling.iops-total-max=im]|[[,throttling.iops-read-max=irm][,throttling.iops-write-max=iwm]]]\n" + " [[,throttling.iops-size=is]]\n", QEMU_ARCH_ALL) STEXI @@ -2146,7 +2151,7 @@ Example: @example qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ -numa node,memdev=mem \ - -chardev socket,path=/path/to/socket \ + -chardev socket,id=chr0,path=/path/to/socket \ -netdev type=vhost-user,id=net0,chardev=chr0 \ -device virtio-net-pci,netdev=net0 @end example diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c index 5b66d3325d..2a894eec65 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -671,7 +671,7 @@ static S390CPUModel *get_max_cpu_model(Error **errp) if (kvm_enabled()) { kvm_s390_get_host_cpu_model(&max_model, errp); } else { - /* TCG enulates a z900 */ + /* TCG emulates a z900 */ max_model.def = &s390_cpu_defs[0]; bitmap_copy(max_model.features, max_model.def->default_feat, S390_FEAT_MAX); diff --git a/tests/Makefile.include b/tests/Makefile.include index e60bb6ce58..3310c170a3 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -308,8 +308,7 @@ check-qtest-sparc-y = tests/prom-env-test$(EXESUF) check-qtest-sparc64-y = tests/endianness-test$(EXESUF) #check-qtest-sparc64-y += tests/m48t59-test$(EXESUF) #gcov-files-sparc64-y += hw/timer/m48t59.c -#Disabled for now, triggers a TCG bug on 32-bit hosts -#check-qtest-sparc64-y += tests/prom-env-test$(EXESUF) +check-qtest-sparc64-y += tests/prom-env-test$(EXESUF) check-qtest-arm-y = tests/tmp105-test$(EXESUF) check-qtest-arm-y += tests/ds1338-test$(EXESUF) diff --git a/tests/docker/dockerfiles/debian-s390x-cross.docker b/tests/docker/dockerfiles/debian-s390x-cross.docker new file mode 100644 index 0000000000..bbb21ed088 --- /dev/null +++ b/tests/docker/dockerfiles/debian-s390x-cross.docker @@ -0,0 +1,22 @@ +# +# Docker s390 cross-compiler target +# +# This docker target is based on stretch (testing) as the stable build +# doesn't have the cross compiler available. +# +FROM debian:testing-slim + +# Duplicate deb line as deb-src +RUN cat /etc/apt/sources.list | sed "s/deb/deb-src/" >> /etc/apt/sources.list + +# Add the s390x architecture +RUN dpkg --add-architecture s390x + +# Grab the updated list of packages +RUN apt update +RUN apt dist-upgrade -yy +RUN apt-get build-dep -yy -a s390x qemu || apt-get -f install +RUN apt install -yy gcc-multilib-s390x-linux-gnu binutils-multiarch + +# Specify the cross prefix for this image (see tests/docker/common.rc) +ENV QEMU_CONFIGURE_OPTS --cross-prefix=s390x-linux-gnu- diff --git a/tests/ide-test.c b/tests/ide-test.c index fb541f88b5..b57c2b1676 100644 --- a/tests/ide-test.c +++ b/tests/ide-test.c @@ -544,6 +544,7 @@ static void make_dirty(uint8_t device) guest_buf = guest_alloc(guest_malloc, len); buf = g_malloc(len); + memset(buf, rand() % 255 + 1, len); g_assert(guest_buf); g_assert(buf); diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c index bd33bc353d..eac207b30e 100644 --- a/tests/prom-env-test.c +++ b/tests/prom-env-test.c @@ -76,7 +76,7 @@ static void add_tests(const char *machines[]) int main(int argc, char *argv[]) { const char *sparc_machines[] = { "SPARCbook", "Voyager", "SS-20", NULL }; - const char *sparc64_machines[] = { "sun4u", "sun4v", NULL }; + const char *sparc64_machines[] = { "sun4u", NULL }; const char *ppc_machines[] = { "mac99", "g3beige", NULL }; const char *ppc64_machines[] = { "mac99", "g3beige", "pseries", NULL }; const char *arch = qtest_get_arch(); |