diff options
author | Peter Maydell | 2021-02-04 20:48:30 +0100 |
---|---|---|
committer | Peter Maydell | 2021-02-04 20:48:30 +0100 |
commit | 2c6df987965729df702fa12f79564b5f76e3fa4e (patch) | |
tree | dc26eda04870294a9814ae4414faf5e95a6564dc | |
parent | Merge remote-tracking branch 'remotes/armbru/tags/pull-qmp-2021-02-04' into s... (diff) | |
parent | virtiofsd: Add restart_syscall to the seccomp whitelist (diff) | |
download | qemu-2c6df987965729df702fa12f79564b5f76e3fa4e.tar.gz qemu-2c6df987965729df702fa12f79564b5f76e3fa4e.tar.xz qemu-2c6df987965729df702fa12f79564b5f76e3fa4e.zip |
Merge remote-tracking branch 'remotes/dgilbert-gitlab/tags/pull-virtiofs-20210204' into staging
virtiofs: Security pull 2021-02-04
This contains an important CVE fix for virtiofsd,
together with two fixes for over-eager seccomp rules.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
# gpg: Signature made Thu 04 Feb 2021 18:30:37 GMT
# gpg: using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7
* remotes/dgilbert-gitlab/tags/pull-virtiofs-20210204:
virtiofsd: Add restart_syscall to the seccomp whitelist
virtiofsd: Add _llseek to the seccomp whitelist
virtiofsd: prevent opening of special files (CVE-2020-35517)
virtiofsd: optionally return inode pointer from lo_do_lookup()
virtiofsd: extract lo_do_open() from lo_open()
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | tools/virtiofsd/passthrough_ll.c | 224 | ||||
-rw-r--r-- | tools/virtiofsd/passthrough_seccomp.c | 2 |
2 files changed, 150 insertions, 76 deletions
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index 5fb36d9407..147b59338a 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -459,17 +459,17 @@ static void lo_map_remove(struct lo_map *map, size_t key) } /* Assumes lo->mutex is held */ -static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) +static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd) { struct lo_map_elem *elem; - elem = lo_map_alloc_elem(&lo_data(req)->fd_map); + elem = lo_map_alloc_elem(&lo->fd_map); if (!elem) { return -1; } elem->fd = fd; - return elem - lo_data(req)->fd_map.elems; + return elem - lo->fd_map.elems; } /* Assumes lo->mutex is held */ @@ -555,6 +555,38 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) return fd; } +/* + * Open a file descriptor for an inode. Returns -EBADF if the inode is not a + * regular file or a directory. + * + * Use this helper function instead of raw openat(2) to prevent security issues + * when a malicious client opens special files such as block device nodes. + * Symlink inodes are also rejected since symlinks must already have been + * traversed on the client side. + */ +static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode, + int open_flags) +{ + g_autofree char *fd_str = g_strdup_printf("%d", inode->fd); + int fd; + + if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) { + return -EBADF; + } + + /* + * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier + * that the inode is not a special file but if an external process races + * with us then symlinks are traversed here. It is not possible to escape + * the shared directory since it is mounted as "/" though. + */ + fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW); + if (fd < 0) { + return -errno; + } + return fd; +} + static void lo_init(void *userdata, struct fuse_conn_info *conn) { struct lo_data *lo = (struct lo_data *)userdata; @@ -684,9 +716,9 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, if (fi) { truncfd = fd; } else { - sprintf(procname, "%i", ifd); - truncfd = openat(lo->proc_self_fd, procname, O_RDWR); + truncfd = lo_inode_open(lo, inode, O_RDWR); if (truncfd < 0) { + errno = -truncfd; goto out_err; } } @@ -831,11 +863,13 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname, } /* - * Increments nlookup and caller must release refcount using - * lo_inode_put(&parent). + * Increments nlookup on the inode on success. unref_inode_lolocked() must be + * called eventually to decrement nlookup again. If inodep is non-NULL, the + * inode pointer is stored and the caller must call lo_inode_put(). */ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - struct fuse_entry_param *e) + struct fuse_entry_param *e, + struct lo_inode **inodep) { int newfd; int res; @@ -845,6 +879,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, struct lo_inode *inode = NULL; struct lo_inode *dir = lo_inode(req, parent); + if (inodep) { + *inodep = NULL; /* in case there is an error */ + } + /* * name_to_handle_at() and open_by_handle_at() can reach here with fuse * mount point in guest, but we don't have its inode info in the @@ -913,7 +951,14 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, pthread_mutex_unlock(&lo->mutex); } e->ino = inode->fuse_ino; - lo_inode_put(lo, &inode); + + /* Transfer ownership of inode pointer to caller or drop it */ + if (inodep) { + *inodep = inode; + } else { + lo_inode_put(lo, &inode); + } + lo_inode_put(lo, &dir); fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, @@ -948,7 +993,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) return; } - err = lo_do_lookup(req, parent, name, &e); + err = lo_do_lookup(req, parent, name, &e, NULL); if (err) { fuse_reply_err(req, err); } else { @@ -1056,7 +1101,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, goto out; } - saverr = lo_do_lookup(req, parent, name, &e); + saverr = lo_do_lookup(req, parent, name, &e, NULL); if (saverr) { goto out; } @@ -1534,7 +1579,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, if (plus) { if (!is_dot_or_dotdot(name)) { - err = lo_do_lookup(req, ino, name, &e); + err = lo_do_lookup(req, ino, name, &e, NULL); if (err) { goto error; } @@ -1651,12 +1696,52 @@ static void update_open_flags(int writeback, int allow_direct_io, } } +/* + * Open a regular file, set up an fd mapping, and fill out the struct + * fuse_file_info for it. If existing_fd is not negative, use that fd instead + * opening a new one. Takes ownership of existing_fd. + * + * Returns 0 on success or a positive errno. + */ +static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, + int existing_fd, struct fuse_file_info *fi) +{ + ssize_t fh; + int fd = existing_fd; + + update_open_flags(lo->writeback, lo->allow_direct_io, fi); + + if (fd < 0) { + fd = lo_inode_open(lo, inode, fi->flags); + if (fd < 0) { + return -fd; + } + } + + pthread_mutex_lock(&lo->mutex); + fh = lo_add_fd_mapping(lo, fd); + pthread_mutex_unlock(&lo->mutex); + if (fh == -1) { + close(fd); + return ENOMEM; + } + + fi->fh = fh; + if (lo->cache == CACHE_NONE) { + fi->direct_io = 1; + } else if (lo->cache == CACHE_ALWAYS) { + fi->keep_cache = 1; + } + return 0; +} + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, mode_t mode, struct fuse_file_info *fi) { - int fd; + int fd = -1; struct lo_data *lo = lo_data(req); struct lo_inode *parent_inode; + struct lo_inode *inode = NULL; struct fuse_entry_param e; int err; struct lo_cred old = {}; @@ -1682,36 +1767,38 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, update_open_flags(lo->writeback, lo->allow_direct_io, fi); - fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, - mode); + /* Try to create a new file but don't open existing files */ + fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode); err = fd == -1 ? errno : 0; - lo_restore_cred(&old); - if (!err) { - ssize_t fh; + lo_restore_cred(&old); - pthread_mutex_lock(&lo->mutex); - fh = lo_add_fd_mapping(req, fd); - pthread_mutex_unlock(&lo->mutex); - if (fh == -1) { - close(fd); - err = ENOMEM; - goto out; - } + /* Ignore the error if file exists and O_EXCL was not given */ + if (err && (err != EEXIST || (fi->flags & O_EXCL))) { + goto out; + } - fi->fh = fh; - err = lo_do_lookup(req, parent, name, &e); + err = lo_do_lookup(req, parent, name, &e, &inode); + if (err) { + goto out; } - if (lo->cache == CACHE_NONE) { - fi->direct_io = 1; - } else if (lo->cache == CACHE_ALWAYS) { - fi->keep_cache = 1; + + err = lo_do_open(lo, inode, fd, fi); + fd = -1; /* lo_do_open() takes ownership of fd */ + if (err) { + /* Undo lo_do_lookup() nlookup ref */ + unref_inode_lolocked(lo, inode, 1); } out: + lo_inode_put(lo, &inode); lo_inode_put(lo, &parent_inode); if (err) { + if (fd >= 0) { + close(fd); + } + fuse_reply_err(req, err); } else { fuse_reply_create(req, &e, fi); @@ -1725,7 +1812,6 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, pid_t pid, int *err) { struct lo_inode_plock *plock; - char procname[64]; int fd; plock = @@ -1742,12 +1828,10 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, } /* Open another instance of file which can be used for ofd locks. */ - sprintf(procname, "%i", inode->fd); - /* TODO: What if file is not writable? */ - fd = openat(lo->proc_self_fd, procname, O_RDWR); - if (fd == -1) { - *err = errno; + fd = lo_inode_open(lo, inode, O_RDWR); + if (fd < 0) { + *err = -fd; free(plock); return NULL; } @@ -1892,38 +1976,25 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) { - int fd; - ssize_t fh; - char buf[64]; struct lo_data *lo = lo_data(req); + struct lo_inode *inode = lo_inode(req, ino); + int err; fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, fi->flags); - update_open_flags(lo->writeback, lo->allow_direct_io, fi); - - sprintf(buf, "%i", lo_fd(req, ino)); - fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); - if (fd == -1) { - return (void)fuse_reply_err(req, errno); - } - - pthread_mutex_lock(&lo->mutex); - fh = lo_add_fd_mapping(req, fd); - pthread_mutex_unlock(&lo->mutex); - if (fh == -1) { - close(fd); - fuse_reply_err(req, ENOMEM); + if (!inode) { + fuse_reply_err(req, EBADF); return; } - fi->fh = fh; - if (lo->cache == CACHE_NONE) { - fi->direct_io = 1; - } else if (lo->cache == CACHE_ALWAYS) { - fi->keep_cache = 1; + err = lo_do_open(lo, inode, -1, fi); + lo_inode_put(lo, &inode); + if (err) { + fuse_reply_err(req, err); + } else { + fuse_reply_open(req, fi); } - fuse_reply_open(req, fi); } static void lo_release(fuse_req_t req, fuse_ino_t ino, @@ -1982,39 +2053,40 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, struct fuse_file_info *fi) { + struct lo_inode *inode = lo_inode(req, ino); + struct lo_data *lo = lo_data(req); int res; int fd; - char *buf; fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, (void *)fi); - if (!fi) { - struct lo_data *lo = lo_data(req); - - res = asprintf(&buf, "%i", lo_fd(req, ino)); - if (res == -1) { - return (void)fuse_reply_err(req, errno); - } + if (!inode) { + fuse_reply_err(req, EBADF); + return; + } - fd = openat(lo->proc_self_fd, buf, O_RDWR); - free(buf); - if (fd == -1) { - return (void)fuse_reply_err(req, errno); + if (!fi) { + fd = lo_inode_open(lo, inode, O_RDWR); + if (fd < 0) { + res = -fd; + goto out; } } else { fd = lo_fi_fd(req, fi); } if (datasync) { - res = fdatasync(fd); + res = fdatasync(fd) == -1 ? errno : 0; } else { - res = fsync(fd); + res = fsync(fd) == -1 ? errno : 0; } if (!fi) { close(fd); } - fuse_reply_err(req, res == -1 ? errno : 0); +out: + lo_inode_put(lo, &inode); + fuse_reply_err(req, res); } static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c index a60d7da4b4..ea852e2e33 100644 --- a/tools/virtiofsd/passthrough_seccomp.c +++ b/tools/virtiofsd/passthrough_seccomp.c @@ -65,6 +65,7 @@ static const int syscall_whitelist[] = { SCMP_SYS(linkat), SCMP_SYS(listxattr), SCMP_SYS(lseek), + SCMP_SYS(_llseek), /* For POWER */ SCMP_SYS(madvise), SCMP_SYS(mkdirat), SCMP_SYS(mknodat), @@ -88,6 +89,7 @@ static const int syscall_whitelist[] = { SCMP_SYS(renameat), SCMP_SYS(renameat2), SCMP_SYS(removexattr), + SCMP_SYS(restart_syscall), SCMP_SYS(rt_sigaction), SCMP_SYS(rt_sigprocmask), SCMP_SYS(rt_sigreturn), |