From cece116c939d219070b250338439c2d16f94e3da Mon Sep 17 00:00:00 2001 From: Misono Tomohiro Date: Thu, 8 Oct 2020 19:31:33 +0900 Subject: configure: add option for virtiofsd Currently it is unknown whether virtiofsd will be built at configuration time. It will be automatically built when dependency is met. Also, required libraries are not clear. To make this clear, add configure option --{enable,disable}-virtiofsd. The default is the same as current (enabled if available) like many other options. When --enable-virtiofsd is given and dependency is not met, we get: ERROR: Problem encountered: virtiofsd requires libcap-ng-devel and seccomp-devel or ERROR: Problem encountered: virtiofsd needs tools and vhost-user support In addition, configuration summary now includes virtiofsd entry: build virtiofs daemon: YES/NO Signed-off-by: Misono Tomohiro Message-Id: <20201008103133.2722903-1-misono.tomohiro@jp.fujitsu.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert Manual merge --- configure | 8 +++++++- meson.build | 1 + meson_options.txt | 2 ++ tools/meson.build | 17 +++++++++++++++-- 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/configure b/configure index 55e07c82dd..83610b0db8 100755 --- a/configure +++ b/configure @@ -302,6 +302,7 @@ fdt="auto" netmap="no" sdl="auto" sdl_image="auto" +virtiofsd="auto" virtfs="" libudev="auto" mpath="auto" @@ -999,6 +1000,10 @@ for opt do ;; --enable-libudev) libudev="enabled" ;; + --disable-virtiofsd) virtiofsd="disabled" + ;; + --enable-virtiofsd) virtiofsd="enabled" + ;; --disable-mpath) mpath="disabled" ;; --enable-mpath) mpath="enabled" @@ -1758,6 +1763,7 @@ disabled with --disable-FEATURE, default is enabled if available: vnc-png PNG compression for VNC server cocoa Cocoa UI (Mac OS X only) virtfs VirtFS + virtiofsd build virtiofs daemon (virtiofsd) libudev Use libudev to enumerate host devices mpath Multipath persistent reservation passthrough xen xen backend driver support @@ -6972,7 +6978,7 @@ NINJA=$ninja $meson setup \ -Dxen=$xen -Dxen_pci_passthrough=$xen_pci_passthrough -Dtcg=$tcg \ -Dcocoa=$cocoa -Dmpath=$mpath -Dsdl=$sdl -Dsdl_image=$sdl_image \ -Dvnc=$vnc -Dvnc_sasl=$vnc_sasl -Dvnc_jpeg=$vnc_jpeg -Dvnc_png=$vnc_png \ - -Dgettext=$gettext -Dxkbcommon=$xkbcommon -Du2f=$u2f \ + -Dgettext=$gettext -Dxkbcommon=$xkbcommon -Du2f=$u2f -Dvirtiofsd=$virtiofsd \ -Dcapstone=$capstone -Dslirp=$slirp -Dfdt=$fdt \ -Diconv=$iconv -Dcurses=$curses -Dlibudev=$libudev\ -Ddocs=$docs -Dsphinx_build=$sphinx_build -Dinstall_blobs=$blobs \ diff --git a/meson.build b/meson.build index 835424999d..47e32e1fcb 100644 --- a/meson.build +++ b/meson.build @@ -2045,6 +2045,7 @@ summary_info += {'Audio drivers': config_host['CONFIG_AUDIO_DRIVERS']} summary_info += {'Block whitelist (rw)': config_host['CONFIG_BDRV_RW_WHITELIST']} summary_info += {'Block whitelist (ro)': config_host['CONFIG_BDRV_RO_WHITELIST']} summary_info += {'VirtFS support': config_host.has_key('CONFIG_VIRTFS')} +summary_info += {'build virtiofs daemon': have_virtiofsd} summary_info += {'Multipath support': mpathpersist.found()} summary_info += {'VNC support': vnc.found()} if vnc.found() diff --git a/meson_options.txt b/meson_options.txt index 48ab4ce7d0..b4f1801875 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -62,6 +62,8 @@ option('vnc_sasl', type : 'feature', value : 'auto', description: 'SASL authentication for VNC server') option('xkbcommon', type : 'feature', value : 'auto', description: 'xkbcommon support') +option('virtiofsd', type: 'feature', value: 'auto', + description: 'build virtiofs daemon (virtiofsd)') option('capstone', type: 'combo', value: 'auto', choices: ['disabled', 'enabled', 'auto', 'system', 'internal'], diff --git a/tools/meson.build b/tools/meson.build index 513bd2ff4f..76bf84df52 100644 --- a/tools/meson.build +++ b/tools/meson.build @@ -1,10 +1,23 @@ -have_virtiofsd = (have_system and +have_virtiofsd = (targetos == 'linux' and have_tools and - 'CONFIG_LINUX' in config_host and 'CONFIG_SECCOMP' in config_host and 'CONFIG_LIBCAP_NG' in config_host and 'CONFIG_VHOST_USER' in config_host) +if get_option('virtiofsd').enabled() + if not have_virtiofsd + if targetos != 'linux' + error('virtiofsd requires Linux') + elif 'CONFIG_SECCOMP' not in config_host or 'CONFIG_LIBCAP_NG' not in config_host + error('virtiofsd requires libcap-ng-devel and seccomp-devel') + elif not have_tools or 'CONFIG_VHOST_USER' not in config_host + error('virtiofsd needs tools and vhost-user support') + endif + endif +elif get_option('virtiofsd').disabled() or not have_system + have_virtiofsd = false +endif + if have_virtiofsd subdir('virtiofsd') endif -- cgit v1.2.3-55-g7522 From 800ad114f10d0bf94e49b1441e1a13064a45a967 Mon Sep 17 00:00:00 2001 From: Misono Tomohiro Date: Thu, 8 Oct 2020 20:01:48 +0900 Subject: virtiofsd: passthrough_ll: set FUSE_LOG_INFO as default log_level Just noticed that although help message says default log level is INFO, it is actually 0 (EMRGE) and no mesage will be shown when error occurs. It's better to follow help message. Signed-off-by: Misono Tomohiro Message-Id: <20201008110148.2757734-1-misono.tomohiro@jp.fujitsu.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/passthrough_ll.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index ff53df4451..13fdb12367 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -2878,12 +2878,11 @@ int main(int argc, char *argv[]) goto err_out1; } - /* - * log_level is 0 if not configured via cmd options (0 is LOG_EMERG, - * and we don't use this log level). - */ if (opts.log_level != 0) { current_log_level = opts.log_level; + } else { + /* default log level is INFO */ + current_log_level = FUSE_LOG_INFO; } lo.debug = opts.debug; if (lo.debug) { -- cgit v1.2.3-55-g7522 From 06844584b62a43384642f7243b0fc01c9fff0fc7 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 8 Oct 2020 09:55:34 +0100 Subject: virtiofsd: add container-friendly -o sandbox=chroot option virtiofsd cannot run in a container because CAP_SYS_ADMIN is required to create namespaces. Introduce a weaker sandbox mode that is sufficient in container environments because the container runtime already sets up namespaces. Use chroot to restrict path traversal to the shared directory. virtiofsd loses the following: 1. Mount namespace. The process chroots to the shared directory but leaves the mounts in place. Seccomp rejects mount(2)/umount(2) syscalls. 2. Pid namespace. This should be fine because virtiofsd is the only process running in the container. 3. Network namespace. This should be fine because seccomp already rejects the connect(2) syscall, but an additional layer of security is lost. Container runtime-specific network security policies can be used drop network traffic (except for the vhost-user UNIX domain socket). Signed-off-by: Stefan Hajnoczi Message-Id: <20201008085534.16070-1-stefanha@redhat.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- docs/tools/virtiofsd.rst | 32 +++++++++++++++++----- tools/virtiofsd/helper.c | 8 ++++++ tools/virtiofsd/passthrough_ll.c | 57 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 88 insertions(+), 9 deletions(-) diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst index 7ecee49834..65f8e76569 100644 --- a/docs/tools/virtiofsd.rst +++ b/docs/tools/virtiofsd.rst @@ -17,13 +17,24 @@ This program is designed to work with QEMU's ``--device vhost-user-fs-pci`` but should work with any virtual machine monitor (VMM) that supports vhost-user. See the Examples section below. -This program must be run as the root user. Upon startup the program will -switch into a new file system namespace with the shared directory tree as its -root. This prevents "file system escapes" due to symlinks and other file -system objects that might lead to files outside the shared directory. The -program also sandboxes itself using seccomp(2) to prevent ptrace(2) and other -vectors that could allow an attacker to compromise the system after gaining -control of the virtiofsd process. +This program must be run as the root user. The program drops privileges where +possible during startup although it must be able to create and access files +with any uid/gid: + +* The ability to invoke syscalls is limited using seccomp(2). +* Linux capabilities(7) are dropped. + +In "namespace" sandbox mode the program switches into a new file system +namespace and invokes pivot_root(2) to make the shared directory tree its root. +A new pid and net namespace is also created to isolate the process. + +In "chroot" sandbox mode the program invokes chroot(2) to make the shared +directory tree its root. This mode is intended for container environments where +the container runtime has already set up the namespaces and the program does +not have permission to create namespaces itself. + +Both sandbox modes prevent "file system escapes" due to symlinks and other file +system objects that might lead to files outside the shared directory. Options ------- @@ -69,6 +80,13 @@ Options * readdirplus|no_readdirplus - Enable/disable readdirplus. The default is ``readdirplus``. + * sandbox=namespace|chroot - + Sandbox mode: + - namespace: Create mount, pid, and net namespaces and pivot_root(2) into + the shared directory. + - chroot: chroot(2) into shared directory (use in containers). + The default is "namespace". + * source=PATH - Share host directory tree located at PATH. This option is required. diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c index 85770d63f1..2e181a49b5 100644 --- a/tools/virtiofsd/helper.c +++ b/tools/virtiofsd/helper.c @@ -166,6 +166,14 @@ void fuse_cmdline_help(void) " enable/disable readirplus\n" " default: readdirplus except with " "cache=none\n" + " -o sandbox=namespace|chroot\n" + " sandboxing mode:\n" + " - namespace: mount, pid, and net\n" + " namespaces with pivot_root(2)\n" + " into shared directory\n" + " - chroot: chroot(2) into shared\n" + " directory (use in containers)\n" + " default: namespace\n" " -o timeout= I/O timeout (seconds)\n" " default: depends on cache= option.\n" " -o writeback|no_writeback enable/disable writeback cache\n" diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index 13fdb12367..f03b1f9a69 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -137,8 +137,14 @@ enum { CACHE_ALWAYS, }; +enum { + SANDBOX_NAMESPACE, + SANDBOX_CHROOT, +}; + struct lo_data { pthread_mutex_t mutex; + int sandbox; int debug; int writeback; int flock; @@ -163,6 +169,12 @@ struct lo_data { }; static const struct fuse_opt lo_opts[] = { + { "sandbox=namespace", + offsetof(struct lo_data, sandbox), + SANDBOX_NAMESPACE }, + { "sandbox=chroot", + offsetof(struct lo_data, sandbox), + SANDBOX_CHROOT }, { "writeback", offsetof(struct lo_data, writeback), 1 }, { "no_writeback", offsetof(struct lo_data, writeback), 0 }, { "source=%s", offsetof(struct lo_data, source), 0 }, @@ -2660,6 +2672,41 @@ static void setup_capabilities(char *modcaps_in) pthread_mutex_unlock(&cap.mutex); } +/* + * Use chroot as a weaker sandbox for environments where the process is + * launched without CAP_SYS_ADMIN. + */ +static void setup_chroot(struct lo_data *lo) +{ + lo->proc_self_fd = open("/proc/self/fd", O_PATH); + if (lo->proc_self_fd == -1) { + fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/fd\", O_PATH): %m\n"); + exit(1); + } + + /* + * Make the shared directory the file system root so that FUSE_OPEN + * (lo_open()) cannot escape the shared directory by opening a symlink. + * + * The chroot(2) syscall is later disabled by seccomp and the + * CAP_SYS_CHROOT capability is dropped so that tampering with the chroot + * is not possible. + * + * However, it's still possible to escape the chroot via lo->proc_self_fd + * but that requires first gaining control of the process. + */ + if (chroot(lo->source) != 0) { + fuse_log(FUSE_LOG_ERR, "chroot(\"%s\"): %m\n", lo->source); + exit(1); + } + + /* Move into the chroot */ + if (chdir("/") != 0) { + fuse_log(FUSE_LOG_ERR, "chdir(\"/\"): %m\n"); + exit(1); + } +} + /* * Lock down this process to prevent access to other processes or files outside * source directory. This reduces the impact of arbitrary code execution bugs. @@ -2667,8 +2714,13 @@ static void setup_capabilities(char *modcaps_in) static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, bool enable_syslog) { - setup_namespaces(lo, se); - setup_mounts(lo->source); + if (lo->sandbox == SANDBOX_NAMESPACE) { + setup_namespaces(lo, se); + setup_mounts(lo->source); + } else { + setup_chroot(lo); + } + setup_seccomp(enable_syslog); setup_capabilities(g_strdup(lo->modcaps)); } @@ -2815,6 +2867,7 @@ int main(int argc, char *argv[]) struct fuse_session *se; struct fuse_cmdline_opts opts; struct lo_data lo = { + .sandbox = SANDBOX_NAMESPACE, .debug = 0, .writeback = 0, .posix_lock = 0, -- cgit v1.2.3-55-g7522 From 6084633dff3a05d63176e06d7012c7e15aba15be Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Fri, 23 Oct 2020 17:58:08 +0100 Subject: tools/virtiofsd: xattr name mappings: Add option Add an option to define mappings of xattr names so that the client and server filesystems see different views. This can be used to have different SELinux mappings as seen by the guest, to run the virtiofsd with less privileges (e.g. in a case where it can't set trusted/system/security xattrs but you want the guest to be able to), or to isolate multiple users of the same name; e.g. trusted attributes used by stacking overlayfs. A mapping engine is used with 3 simple rules; the rules can be combined to allow most useful mapping scenarios. The ruleset is defined by -o xattrmap='rules...'. This patch doesn't use the rule maps yet. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20201023165812.36028-2-dgilbert@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- docs/tools/virtiofsd.rst | 92 +++++++++++++++++++++ tools/virtiofsd/passthrough_ll.c | 173 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 265 insertions(+) diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst index 65f8e76569..67c16f9df0 100644 --- a/docs/tools/virtiofsd.rst +++ b/docs/tools/virtiofsd.rst @@ -127,6 +127,98 @@ Options timeout. ``always`` sets a long cache lifetime at the expense of coherency. The default is ``auto``. +xattr-mapping +------------- + +By default the name of xattr's used by the client are passed through to the server +file system. This can be a problem where either those xattr names are used +by something on the server (e.g. selinux client/server confusion) or if the +virtiofsd is running in a container with restricted privileges where it cannot +access some attributes. + +A mapping of xattr names can be made using -o xattrmap=mapping where the ``mapping`` +string consists of a series of rules. + +The first matching rule terminates the mapping. +The set of rules must include a terminating rule to match any remaining attributes +at the end. + +Each rule consists of a number of fields separated with a separator that is the +first non-white space character in the rule. This separator must then be used +for the whole rule. +White space may be added before and after each rule. +Using ':' as the separator a rule is of the form: + +``:type:scope:key:prepend:`` + +**scope** is: + +- 'client' - match 'key' against a xattr name from the client for + setxattr/getxattr/removexattr +- 'server' - match 'prepend' against a xattr name from the server + for listxattr +- 'all' - can be used to make a single rule where both the server + and client matches are triggered. + +**type** is one of: + +- 'prefix' - is designed to prepend and strip a prefix; the modified + attributes then being passed on to the client/server. + +- 'ok' - Causes the rule set to be terminated when a match is found + while allowing matching xattr's through unchanged. + It is intended both as a way of explicitly terminating + the list of rules, and to allow some xattr's to skip following rules. + +- 'bad' - If a client tries to use a name matching 'key' it's + denied using EPERM; when the server passes an attribute + name matching 'prepend' it's hidden. In many ways it's use is very like + 'ok' as either an explict terminator or for special handling of certain + patterns. + +**key** is a string tested as a prefix on an attribute name originating +on the client. It maybe empty in which case a 'client' rule +will always match on client names. + +**prepend** is a string tested as a prefix on an attribute name originating +on the server, and used as a new prefix. It may be empty +in which case a 'server' rule will always match on all names from +the server. + +e.g.: + + ``:prefix:client:trusted.:user.virtiofs.:`` + + will match 'trusted.' attributes in client calls and prefix them before + passing them to the server. + + ``:prefix:server::user.virtiofs.:`` + + will strip 'user.virtiofs.' from all server replies. + + ``:prefix:all:trusted.:user.virtiofs.:`` + + combines the previous two cases into a single rule. + + ``:ok:client:user.::`` + + will allow get/set xattr for 'user.' xattr's and ignore + following rules. + + ``:ok:server::security.:`` + + will pass 'securty.' xattr's in listxattr from the server + and ignore following rules. + + ``:ok:all:::`` + + will terminate the rule search passing any remaining attributes + in both directions. + + ``:bad:server::security.:`` + + would hide 'security.' xattr's in listxattr from the server. + Examples -------- diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index f03b1f9a69..cb24af27f5 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -64,6 +64,7 @@ #include #include +#include "qemu/cutils.h" #include "passthrough_helpers.h" #include "passthrough_seccomp.h" @@ -142,6 +143,12 @@ enum { SANDBOX_CHROOT, }; +typedef struct xattr_map_entry { + char *key; + char *prepend; + unsigned int flags; +} XattrMapEntry; + struct lo_data { pthread_mutex_t mutex; int sandbox; @@ -150,6 +157,7 @@ struct lo_data { int flock; int posix_lock; int xattr; + char *xattrmap; char *source; char *modcaps; double timeout; @@ -163,6 +171,8 @@ struct lo_data { struct lo_map ino_map; /* protected by lo->mutex */ struct lo_map dirp_map; /* protected by lo->mutex */ struct lo_map fd_map; /* protected by lo->mutex */ + XattrMapEntry *xattr_map_list; + size_t xattr_map_nentries; /* An O_PATH file descriptor to /proc/self/fd/ */ int proc_self_fd; @@ -184,6 +194,7 @@ static const struct fuse_opt lo_opts[] = { { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 }, { "xattr", offsetof(struct lo_data, xattr), 1 }, { "no_xattr", offsetof(struct lo_data, xattr), 0 }, + { "xattrmap=%s", offsetof(struct lo_data, xattrmap), 0 }, { "modcaps=%s", offsetof(struct lo_data, modcaps), 0 }, { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, @@ -2022,6 +2033,161 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, fuse_reply_err(req, res == -1 ? errno : 0); } +/* types */ +/* + * Exit; process attribute unmodified if matched. + * An empty key applies to all. + */ +#define XATTR_MAP_FLAG_OK (1 << 0) +/* + * The attribute is unwanted; + * EPERM on write, hidden on read. + */ +#define XATTR_MAP_FLAG_BAD (1 << 1) +/* + * For attr that start with 'key' prepend 'prepend' + * 'key' may be empty to prepend for all attrs + * key is defined from set/remove point of view. + * Automatically reversed on read + */ +#define XATTR_MAP_FLAG_PREFIX (1 << 2) + +/* scopes */ +/* Apply rule to get/set/remove */ +#define XATTR_MAP_FLAG_CLIENT (1 << 16) +/* Apply rule to list */ +#define XATTR_MAP_FLAG_SERVER (1 << 17) +/* Apply rule to all */ +#define XATTR_MAP_FLAG_ALL (XATTR_MAP_FLAG_SERVER | XATTR_MAP_FLAG_CLIENT) + +static void add_xattrmap_entry(struct lo_data *lo, + const XattrMapEntry *new_entry) +{ + XattrMapEntry *res = g_realloc_n(lo->xattr_map_list, + lo->xattr_map_nentries + 1, + sizeof(XattrMapEntry)); + res[lo->xattr_map_nentries++] = *new_entry; + + lo->xattr_map_list = res; +} + +static void free_xattrmap(struct lo_data *lo) +{ + XattrMapEntry *map = lo->xattr_map_list; + size_t i; + + if (!map) { + return; + } + + for (i = 0; i < lo->xattr_map_nentries; i++) { + g_free(map[i].key); + g_free(map[i].prepend); + }; + + g_free(map); + lo->xattr_map_list = NULL; + lo->xattr_map_nentries = -1; +} + +static void parse_xattrmap(struct lo_data *lo) +{ + const char *map = lo->xattrmap; + const char *tmp; + + lo->xattr_map_nentries = 0; + while (*map) { + XattrMapEntry tmp_entry; + char sep; + + if (isspace(*map)) { + map++; + continue; + } + /* The separator is the first non-space of the rule */ + sep = *map++; + if (!sep) { + break; + } + + tmp_entry.flags = 0; + /* Start of 'type' */ + if (strstart(map, "prefix", &map)) { + tmp_entry.flags |= XATTR_MAP_FLAG_PREFIX; + } else if (strstart(map, "ok", &map)) { + tmp_entry.flags |= XATTR_MAP_FLAG_OK; + } else if (strstart(map, "bad", &map)) { + tmp_entry.flags |= XATTR_MAP_FLAG_BAD; + } else { + fuse_log(FUSE_LOG_ERR, + "%s: Unexpected type;" + "Expecting 'prefix', 'ok', or 'bad' in rule %zu\n", + __func__, lo->xattr_map_nentries); + exit(1); + } + + if (*map++ != sep) { + fuse_log(FUSE_LOG_ERR, + "%s: Missing '%c' at end of type field of rule %zu\n", + __func__, sep, lo->xattr_map_nentries); + exit(1); + } + + /* Start of 'scope' */ + if (strstart(map, "client", &map)) { + tmp_entry.flags |= XATTR_MAP_FLAG_CLIENT; + } else if (strstart(map, "server", &map)) { + tmp_entry.flags |= XATTR_MAP_FLAG_SERVER; + } else if (strstart(map, "all", &map)) { + tmp_entry.flags |= XATTR_MAP_FLAG_ALL; + } else { + fuse_log(FUSE_LOG_ERR, + "%s: Unexpected scope;" + " Expecting 'client', 'server', or 'all', in rule %zu\n", + __func__, lo->xattr_map_nentries); + exit(1); + } + + if (*map++ != sep) { + fuse_log(FUSE_LOG_ERR, + "%s: Expecting '%c' found '%c'" + " after scope in rule %zu\n", + __func__, sep, *map, lo->xattr_map_nentries); + exit(1); + } + + /* At start of 'key' field */ + tmp = strchr(map, sep); + if (!tmp) { + fuse_log(FUSE_LOG_ERR, + "%s: Missing '%c' at end of key field of rule %zu", + __func__, sep, lo->xattr_map_nentries); + exit(1); + } + tmp_entry.key = g_strndup(map, tmp - map); + map = tmp + 1; + + /* At start of 'prepend' field */ + tmp = strchr(map, sep); + if (!tmp) { + fuse_log(FUSE_LOG_ERR, + "%s: Missing '%c' at end of prepend field of rule %zu", + __func__, sep, lo->xattr_map_nentries); + exit(1); + } + tmp_entry.prepend = g_strndup(map, tmp - map); + map = tmp + 1; + + add_xattrmap_entry(lo, &tmp_entry); + /* End of rule - go around again for another rule */ + } + + if (!lo->xattr_map_nentries) { + fuse_log(FUSE_LOG_ERR, "Empty xattr map\n"); + exit(1); + } +} + static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, size_t size) { @@ -2858,6 +3024,8 @@ static void fuse_lo_data_cleanup(struct lo_data *lo) close(lo->root.fd); } + free(lo->xattrmap); + free_xattrmap(lo); free(lo->source); } @@ -2958,6 +3126,11 @@ int main(int argc, char *argv[]) } else { lo.source = strdup("/"); } + + if (lo.xattrmap) { + parse_xattrmap(&lo); + } + if (!lo.timeout_set) { switch (lo.cache) { case CACHE_NONE: -- cgit v1.2.3-55-g7522 From 4f088dbf98aecde6eed94d630964cc36cd39837b Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Fri, 23 Oct 2020 17:58:09 +0100 Subject: tools/virtiofsd: xattr name mappings: Map client xattr names Map xattr names originating at the client; from get/set/remove xattr. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20201023165812.36028-3-dgilbert@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/passthrough_ll.c | 101 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 98 insertions(+), 3 deletions(-) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index cb24af27f5..cf67737751 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -2188,20 +2188,80 @@ static void parse_xattrmap(struct lo_data *lo) } } -static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, +/* + * For use with getxattr/setxattr/removexattr, where the client + * gives us a name and we may need to choose a different one. + * Allocates a buffer for the result placing it in *out_name. + * If there's no change then *out_name is not set. + * Returns 0 on success + * Can return -EPERM to indicate we block a given attribute + * (in which case out_name is not allocated) + * Can return -ENOMEM to indicate out_name couldn't be allocated. + */ +static int xattr_map_client(const struct lo_data *lo, const char *client_name, + char **out_name) +{ + size_t i; + for (i = 0; i < lo->xattr_map_nentries; i++) { + const XattrMapEntry *cur_entry = lo->xattr_map_list + i; + + if ((cur_entry->flags & XATTR_MAP_FLAG_CLIENT) && + (strstart(client_name, cur_entry->key, NULL))) { + if (cur_entry->flags & XATTR_MAP_FLAG_BAD) { + return -EPERM; + } + if (cur_entry->flags & XATTR_MAP_FLAG_OK) { + /* Unmodified name */ + return 0; + } + if (cur_entry->flags & XATTR_MAP_FLAG_PREFIX) { + *out_name = g_try_malloc(strlen(client_name) + + strlen(cur_entry->prepend) + 1); + if (!*out_name) { + return -ENOMEM; + } + sprintf(*out_name, "%s%s", cur_entry->prepend, client_name); + return 0; + } + } + } + + return -EPERM; +} + +static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name, size_t size) { struct lo_data *lo = lo_data(req); char *value = NULL; char procname[64]; + const char *name; + char *mapped_name; struct lo_inode *inode; ssize_t ret; int saverr; int fd = -1; + mapped_name = NULL; + name = in_name; + if (lo->xattrmap) { + ret = xattr_map_client(lo, in_name, &mapped_name); + if (ret < 0) { + if (ret == -EPERM) { + ret = -ENODATA; + } + fuse_reply_err(req, -ret); + return; + } + if (mapped_name) { + name = mapped_name; + } + } + inode = lo_inode(req, ino); if (!inode) { fuse_reply_err(req, EBADF); + g_free(mapped_name); return; } @@ -2266,6 +2326,7 @@ out_err: saverr = errno; out: fuse_reply_err(req, saverr); + g_free(mapped_name); goto out_free; } @@ -2343,19 +2404,35 @@ out: goto out_free; } -static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, +static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name, const char *value, size_t size, int flags) { char procname[64]; + const char *name; + char *mapped_name; struct lo_data *lo = lo_data(req); struct lo_inode *inode; ssize_t ret; int saverr; int fd = -1; + mapped_name = NULL; + name = in_name; + if (lo->xattrmap) { + ret = xattr_map_client(lo, in_name, &mapped_name); + if (ret < 0) { + fuse_reply_err(req, -ret); + return; + } + if (mapped_name) { + name = mapped_name; + } + } + inode = lo_inode(req, ino); if (!inode) { fuse_reply_err(req, EBADF); + g_free(mapped_name); return; } @@ -2390,21 +2467,38 @@ out: } lo_inode_put(lo, &inode); + g_free(mapped_name); fuse_reply_err(req, saverr); } -static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) +static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *in_name) { char procname[64]; + const char *name; + char *mapped_name; struct lo_data *lo = lo_data(req); struct lo_inode *inode; ssize_t ret; int saverr; int fd = -1; + mapped_name = NULL; + name = in_name; + if (lo->xattrmap) { + ret = xattr_map_client(lo, in_name, &mapped_name); + if (ret < 0) { + fuse_reply_err(req, -ret); + return; + } + if (mapped_name) { + name = mapped_name; + } + } + inode = lo_inode(req, ino); if (!inode) { fuse_reply_err(req, EBADF); + g_free(mapped_name); return; } @@ -2439,6 +2533,7 @@ out: } lo_inode_put(lo, &inode); + g_free(mapped_name); fuse_reply_err(req, saverr); } -- cgit v1.2.3-55-g7522 From 6409cf19ca17ab2acec6f014290f24e137198751 Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Fri, 23 Oct 2020 17:58:10 +0100 Subject: tools/virtiofsd: xattr name mappings: Map server xattr names Map xattr names coming from the server, i.e. the host filesystem; currently this is only from listxattr. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20201023165812.36028-4-dgilbert@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/passthrough_ll.c | 90 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index cf67737751..8c0187498a 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -2229,6 +2229,44 @@ static int xattr_map_client(const struct lo_data *lo, const char *client_name, return -EPERM; } +/* + * For use with listxattr where the server fs gives us a name and we may need + * to sanitize this for the client. + * Returns a pointer to the result in *out_name + * This is always the original string or the current string with some prefix + * removed; no reallocation is done. + * Returns 0 on success + * Can return -ENODATA to indicate the name should be dropped from the list. + */ +static int xattr_map_server(const struct lo_data *lo, const char *server_name, + const char **out_name) +{ + size_t i; + const char *end; + + for (i = 0; i < lo->xattr_map_nentries; i++) { + const XattrMapEntry *cur_entry = lo->xattr_map_list + i; + + if ((cur_entry->flags & XATTR_MAP_FLAG_SERVER) && + (strstart(server_name, cur_entry->prepend, &end))) { + if (cur_entry->flags & XATTR_MAP_FLAG_BAD) { + return -ENODATA; + } + if (cur_entry->flags & XATTR_MAP_FLAG_OK) { + *out_name = server_name; + return 0; + } + if (cur_entry->flags & XATTR_MAP_FLAG_PREFIX) { + /* Remove prefix */ + *out_name = end; + return 0; + } + } + } + + return -ENODATA; +} + static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name, size_t size) { @@ -2383,8 +2421,60 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) if (ret == 0) { goto out; } + + if (lo->xattr_map_list) { + /* + * Map the names back, some attributes might be dropped, + * some shortened, but not increased, so we shouldn't + * run out of room. + */ + size_t out_index, in_index; + out_index = 0; + in_index = 0; + while (in_index < ret) { + const char *map_out; + char *in_ptr = value + in_index; + /* Length of current attribute name */ + size_t in_len = strlen(value + in_index) + 1; + + int mapret = xattr_map_server(lo, in_ptr, &map_out); + if (mapret != -ENODATA && mapret != 0) { + /* Shouldn't happen */ + saverr = -mapret; + goto out; + } + if (mapret == 0) { + /* Either unchanged, or truncated */ + size_t out_len; + if (map_out != in_ptr) { + /* +1 copies the NIL */ + out_len = strlen(map_out) + 1; + } else { + /* No change */ + out_len = in_len; + } + /* + * Move result along, may still be needed for an unchanged + * entry if a previous entry was changed. + */ + memmove(value + out_index, map_out, out_len); + + out_index += out_len; + } + in_index += in_len; + } + ret = out_index; + if (ret == 0) { + goto out; + } + } fuse_reply_buf(req, value, ret); } else { + /* + * xattrmap only ever shortens the result, + * so we don't need to do anything clever with the + * allocation length here. + */ fuse_reply_xattr(req, ret); } out_free: -- cgit v1.2.3-55-g7522 From 491bfaea3bd44b47c62f758efffb67a392ded02b Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Fri, 23 Oct 2020 17:58:11 +0100 Subject: tools/virtiofsd: xattr name mapping examples Add a few examples of xattrmaps to the documentation. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Stefan Hajnoczi Message-Id: <20201023165812.36028-5-dgilbert@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- docs/tools/virtiofsd.rst | 50 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst index 67c16f9df0..d80c078d80 100644 --- a/docs/tools/virtiofsd.rst +++ b/docs/tools/virtiofsd.rst @@ -219,6 +219,56 @@ e.g.: would hide 'security.' xattr's in listxattr from the server. +xattr-mapping Examples +---------------------- + +1) Prefix all attributes with 'user.virtiofs.' + +:: + +-o xattrmap=":prefix:all::user.virtiofs.::bad:all:::" + + +This uses two rules, using : as the field separator; +the first rule prefixes and strips 'user.virtiofs.', +the second rule hides any non-prefixed attributes that +the host set. + +2) Prefix 'trusted.' attributes, allow others through + +:: + + "/prefix/all/trusted./user.virtiofs./ + /bad/server//trusted./ + /bad/client/user.virtiofs.// + /ok/all///" + + +Here there are four rules, using / as the field +separator, and also demonstrating that new lines can +be included between rules. +The first rule is the prefixing of 'trusted.' and +stripping of 'user.virtiofs.'. +The second rule hides unprefixed 'trusted.' attributes +on the host. +The third rule stops a guest from explicitly setting +the 'user.virtiofs.' path directly. +Finally, the fourth rule lets all remaining attributes +through. + +3) Hide 'security.' attributes, and allow everything else + +:: + + "/bad/all/security./security./ + /ok/all///' + +The first rule combines what could be separate client and server +rules into a single 'all' rule, matching 'security.' in either +client arguments or lists returned from the host. This stops +the client seeing any 'security.' attributes on the server and +stops it setting any. + Examples -------- -- cgit v1.2.3-55-g7522 From 1d84a0213a2ec7094abe6d896ce0ec0b5aa0cacf Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Fri, 23 Oct 2020 17:58:12 +0100 Subject: tools/virtiofsd: xattr name mappings: Simple 'map' The mapping rule system implemented in the last few patches is extremely flexible, but not easy to use. Add a simple 'map' type as a sprinkling of sugar to make it easy. e.g. -o xattrmap=":map::user.virtiofs.:" would be sufficient to prefix all xattr's or -o xattrmap=":map:trusted.:user.virtiofs.:" would just prefix 'trusted.' xattr's and leave everything else alone. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20201023165812.36028-6-dgilbert@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- docs/tools/virtiofsd.rst | 19 +++++++ tools/virtiofsd/passthrough_ll.c | 112 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 130 insertions(+), 1 deletion(-) diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst index d80c078d80..34a9e40146 100644 --- a/docs/tools/virtiofsd.rst +++ b/docs/tools/virtiofsd.rst @@ -147,6 +147,7 @@ Each rule consists of a number of fields separated with a separator that is the first non-white space character in the rule. This separator must then be used for the whole rule. White space may be added before and after each rule. + Using ':' as the separator a rule is of the form: ``:type:scope:key:prepend:`` @@ -219,6 +220,14 @@ e.g.: would hide 'security.' xattr's in listxattr from the server. +A simpler 'map' type provides a shorter syntax for the common case: + +``:map:key:prepend:`` + +The 'map' type adds a number of separate rules to add **prepend** as a prefix +to the matched **key** (or all attributes if **key** is empty). +There may be at most one 'map' rule and it must be the last rule in the set. + xattr-mapping Examples ---------------------- @@ -234,6 +243,11 @@ the first rule prefixes and strips 'user.virtiofs.', the second rule hides any non-prefixed attributes that the host set. +This is equivalent to the 'map' rule: + +:: +-o xattrmap=":map::user.virtiofs.:" + 2) Prefix 'trusted.' attributes, allow others through :: @@ -256,6 +270,11 @@ the 'user.virtiofs.' path directly. Finally, the fourth rule lets all remaining attributes through. +This is equivalent to the 'map' rule: + +:: +-o xattrmap="/map/trusted./user.virtiofs./" + 3) Hide 'security.' attributes, and allow everything else :: diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index 8c0187498a..a0beb986f3 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -2090,6 +2090,109 @@ static void free_xattrmap(struct lo_data *lo) lo->xattr_map_nentries = -1; } +/* + * Handle the 'map' type, which is sugar for a set of commands + * for the common case of prefixing a subset or everything, + * and allowing anything not prefixed through. + * It must be the last entry in the stream, although there + * can be other entries before it. + * The form is: + * :map:key:prefix: + * + * key maybe empty in which case all entries are prefixed. + */ +static void parse_xattrmap_map(struct lo_data *lo, + const char *rule, char sep) +{ + const char *tmp; + char *key; + char *prefix; + XattrMapEntry tmp_entry; + + if (*rule != sep) { + fuse_log(FUSE_LOG_ERR, + "%s: Expecting '%c' after 'map' keyword, found '%c'\n", + __func__, sep, *rule); + exit(1); + } + + rule++; + + /* At start of 'key' field */ + tmp = strchr(rule, sep); + if (!tmp) { + fuse_log(FUSE_LOG_ERR, + "%s: Missing '%c' at end of key field in map rule\n", + __func__, sep); + exit(1); + } + + key = g_strndup(rule, tmp - rule); + rule = tmp + 1; + + /* At start of prefix field */ + tmp = strchr(rule, sep); + if (!tmp) { + fuse_log(FUSE_LOG_ERR, + "%s: Missing '%c' at end of prefix field in map rule\n", + __func__, sep); + exit(1); + } + + prefix = g_strndup(rule, tmp - rule); + rule = tmp + 1; + + /* + * This should be the end of the string, we don't allow + * any more commands after 'map'. + */ + if (*rule) { + fuse_log(FUSE_LOG_ERR, + "%s: Expecting end of command after map, found '%c'\n", + __func__, *rule); + exit(1); + } + + /* 1st: Prefix matches/everything */ + tmp_entry.flags = XATTR_MAP_FLAG_PREFIX | XATTR_MAP_FLAG_ALL; + tmp_entry.key = g_strdup(key); + tmp_entry.prepend = g_strdup(prefix); + add_xattrmap_entry(lo, &tmp_entry); + + if (!*key) { + /* Prefix all case */ + + /* 2nd: Hide any non-prefixed entries on the host */ + tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_ALL; + tmp_entry.key = g_strdup(""); + tmp_entry.prepend = g_strdup(""); + add_xattrmap_entry(lo, &tmp_entry); + } else { + /* Prefix matching case */ + + /* 2nd: Hide non-prefixed but matching entries on the host */ + tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_SERVER; + tmp_entry.key = g_strdup(""); /* Not used */ + tmp_entry.prepend = g_strdup(key); + add_xattrmap_entry(lo, &tmp_entry); + + /* 3rd: Stop the client accessing prefixed attributes directly */ + tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_CLIENT; + tmp_entry.key = g_strdup(prefix); + tmp_entry.prepend = g_strdup(""); /* Not used */ + add_xattrmap_entry(lo, &tmp_entry); + + /* 4th: Everything else is OK */ + tmp_entry.flags = XATTR_MAP_FLAG_OK | XATTR_MAP_FLAG_ALL; + tmp_entry.key = g_strdup(""); + tmp_entry.prepend = g_strdup(""); + add_xattrmap_entry(lo, &tmp_entry); + } + + g_free(key); + g_free(prefix); +} + static void parse_xattrmap(struct lo_data *lo) { const char *map = lo->xattrmap; @@ -2118,10 +2221,17 @@ static void parse_xattrmap(struct lo_data *lo) tmp_entry.flags |= XATTR_MAP_FLAG_OK; } else if (strstart(map, "bad", &map)) { tmp_entry.flags |= XATTR_MAP_FLAG_BAD; + } else if (strstart(map, "map", &map)) { + /* + * map is sugar that adds a number of rules, and must be + * the last entry. + */ + parse_xattrmap_map(lo, map, sep); + return; } else { fuse_log(FUSE_LOG_ERR, "%s: Unexpected type;" - "Expecting 'prefix', 'ok', or 'bad' in rule %zu\n", + "Expecting 'prefix', 'ok', 'bad' or 'map' in rule %zu\n", __func__, lo->xattr_map_nentries); exit(1); } -- cgit v1.2.3-55-g7522 From 97d741cc96dd08b183cb098a38d55b641e277be0 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 9 Sep 2020 20:40:21 +0200 Subject: linux/fuse.h: Pull in from Linux Update the linux/fuse.h standard header from the kernel development tree that implements FUSE submounts. This adds the fuse_attr.flags field, the FUSE_ATTR_FLAGS INIT flag, and the FUSE_ATTR_SUBMOUNT flag for fuse_attr.flags. Signed-off-by: Max Reitz Message-Id: <20200909184028.262297-2-mreitz@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- include/standard-headers/linux/fuse.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h index f4df0a40f6..7dd7a3b992 100644 --- a/include/standard-headers/linux/fuse.h +++ b/include/standard-headers/linux/fuse.h @@ -227,7 +227,7 @@ struct fuse_attr { uint32_t gid; uint32_t rdev; uint32_t blksize; - uint32_t padding; + uint32_t flags; }; struct fuse_kstatfs { @@ -310,6 +310,7 @@ struct fuse_file_lock { * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request * FUSE_MAP_ALIGNMENT: map_alignment field is valid + * FUSE_ATTR_FLAGS: fuse_attr.flags is present and valid */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -338,6 +339,7 @@ struct fuse_file_lock { #define FUSE_NO_OPENDIR_SUPPORT (1 << 24) #define FUSE_EXPLICIT_INVAL_DATA (1 << 25) #define FUSE_MAP_ALIGNMENT (1 << 26) +#define FUSE_ATTR_FLAGS (1 << 27) /** * CUSE INIT request/reply flags @@ -413,6 +415,13 @@ struct fuse_file_lock { */ #define FUSE_FSYNC_FDATASYNC (1 << 0) +/** + * fuse_attr flags + * + * FUSE_ATTR_SUBMOUNT: File/directory is a submount point + */ +#define FUSE_ATTR_SUBMOUNT (1 << 0) + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ -- cgit v1.2.3-55-g7522 From 2f10415abfc58e1eb8d311d35e97f78d9fc0f16c Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 9 Sep 2020 20:40:22 +0200 Subject: virtiofsd: Announce FUSE_ATTR_FLAGS The fuse_attr.flags field is currently just initialized to 0, which is valid. Thus, there is no reason not to always announce FUSE_ATTR_FLAGS (when the kernel supports it). Signed-off-by: Max Reitz Message-Id: <20200909184028.262297-3-mreitz@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_common.h | 8 ++++++++ tools/virtiofsd/fuse_lowlevel.c | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h index 686c42c0a5..870544fe13 100644 --- a/tools/virtiofsd/fuse_common.h +++ b/tools/virtiofsd/fuse_common.h @@ -352,6 +352,14 @@ struct fuse_file_info { */ #define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) +/** + * Indicates that the client will provide fuse_attr.flags, and the kernel will + * interpret it. + * + * This feature is enabled by default when supported by the kernel. + */ +#define FUSE_CAP_ATTR_FLAGS (1 << 27) + /** * Ioctl flags * diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c index 4d1ba2925d..8679594255 100644 --- a/tools/virtiofsd/fuse_lowlevel.c +++ b/tools/virtiofsd/fuse_lowlevel.c @@ -1988,6 +1988,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, bufsize = max_bufsize; } } + if (arg->flags & FUSE_ATTR_FLAGS) { + se->conn.capable |= FUSE_CAP_ATTR_FLAGS; + } #ifdef HAVE_SPLICE #ifdef HAVE_VMSPLICE se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; @@ -2014,6 +2017,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); + LL_SET_DEFAULT(1, FUSE_CAP_ATTR_FLAGS); LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); LL_SET_DEFAULT(se->op.getlk && se->op.setlk, FUSE_CAP_POSIX_LOCKS); LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); @@ -2103,6 +2107,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, if (se->conn.want & FUSE_CAP_POSIX_ACL) { outarg.flags |= FUSE_POSIX_ACL; } + if (se->conn.want & FUSE_CAP_ATTR_FLAGS) { + outarg.flags |= FUSE_ATTR_FLAGS; + } outarg.max_readahead = se->conn.max_readahead; outarg.max_write = se->conn.max_write; if (se->conn.max_background >= (1 << 16)) { -- cgit v1.2.3-55-g7522 From e2577435d3433b66b6f8adcd6482b5da5198689b Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 9 Sep 2020 20:40:23 +0200 Subject: virtiofsd: Add attr_flags to fuse_entry_param fuse_entry_param is converted to fuse_attr on the line (by fill_entry()), so it should have a member that mirrors fuse_attr.flags. fill_entry() should then copy this fuse_entry_param.attr_flags to fuse_attr.flags. Signed-off-by: Max Reitz Message-Id: <20200909184028.262297-4-mreitz@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_lowlevel.c | 13 +++++++++---- tools/virtiofsd/fuse_lowlevel.h | 5 +++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c index 8679594255..00da84432c 100644 --- a/tools/virtiofsd/fuse_lowlevel.c +++ b/tools/virtiofsd/fuse_lowlevel.c @@ -329,7 +329,8 @@ static unsigned int calc_timeout_nsec(double t) } } -static void fill_entry(struct fuse_entry_out *arg, +static void fill_entry(struct fuse_session *se, + struct fuse_entry_out *arg, const struct fuse_entry_param *e) { *arg = (struct fuse_entry_out){ @@ -341,6 +342,10 @@ static void fill_entry(struct fuse_entry_out *arg, .attr_valid_nsec = calc_timeout_nsec(e->attr_timeout), }; convert_stat(&e->attr, &arg->attr); + + if (se->conn.capable & FUSE_CAP_ATTR_FLAGS) { + arg->attr.flags = e->attr_flags; + } } /* @@ -365,7 +370,7 @@ size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, struct fuse_direntplus *dp = (struct fuse_direntplus *)buf; memset(&dp->entry_out, 0, sizeof(dp->entry_out)); - fill_entry(&dp->entry_out, e); + fill_entry(req->se, &dp->entry_out, e); struct fuse_dirent *dirent = &dp->dirent; *dirent = (struct fuse_dirent){ @@ -403,7 +408,7 @@ int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) size_t size = sizeof(arg); memset(&arg, 0, sizeof(arg)); - fill_entry(&arg, e); + fill_entry(req->se, &arg, e); return send_reply_ok(req, &arg, size); } @@ -416,7 +421,7 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); memset(buf, 0, sizeof(buf)); - fill_entry(earg, e); + fill_entry(req->se, earg, e); fill_open(oarg, f); return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out)); } diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h index 562fd5241e..9c06240f9e 100644 --- a/tools/virtiofsd/fuse_lowlevel.h +++ b/tools/virtiofsd/fuse_lowlevel.h @@ -102,6 +102,11 @@ struct fuse_entry_param { * large value. */ double entry_timeout; + + /** + * Flags for fuse_attr.flags that do not fit into attr. + */ + uint32_t attr_flags; }; /** -- cgit v1.2.3-55-g7522 From ede24b6be798000bbf554891e5b54a0f4848897f Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 9 Sep 2020 20:40:24 +0200 Subject: virtiofsd: Add fuse_reply_attr_with_flags() The plain fuse_reply_attr() function does not allow setting fuse_attr.flags, so add this new function that does. Make fuse_reply_attr() a wrapper around it. Signed-off-by: Max Reitz Message-Id: <20200909184028.262297-5-mreitz@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_lowlevel.c | 14 ++++++++++++-- tools/virtiofsd/fuse_lowlevel.h | 15 +++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c index 00da84432c..5198f627bc 100644 --- a/tools/virtiofsd/fuse_lowlevel.c +++ b/tools/virtiofsd/fuse_lowlevel.c @@ -426,8 +426,8 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out)); } -int fuse_reply_attr(fuse_req_t req, const struct stat *attr, - double attr_timeout) +int fuse_reply_attr_with_flags(fuse_req_t req, const struct stat *attr, + double attr_timeout, uint32_t attr_flags) { struct fuse_attr_out arg; size_t size = sizeof(arg); @@ -437,9 +437,19 @@ int fuse_reply_attr(fuse_req_t req, const struct stat *attr, arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); convert_stat(attr, &arg.attr); + if (req->se->conn.capable & FUSE_CAP_ATTR_FLAGS) { + arg.attr.flags = attr_flags; + } + return send_reply_ok(req, &arg, size); } +int fuse_reply_attr(fuse_req_t req, const struct stat *attr, + double attr_timeout) +{ + return fuse_reply_attr_with_flags(req, attr, attr_timeout, 0); +} + int fuse_reply_readlink(fuse_req_t req, const char *linkname) { return send_reply_ok(req, linkname, strlen(linkname)); diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h index 9c06240f9e..1ff6ba1e4f 100644 --- a/tools/virtiofsd/fuse_lowlevel.h +++ b/tools/virtiofsd/fuse_lowlevel.h @@ -1313,6 +1313,21 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, int fuse_reply_attr(fuse_req_t req, const struct stat *attr, double attr_timeout); +/** + * Reply with attributes and set fuse_attr.flags + * + * Possible requests: + * getattr, setattr + * + * @param req request handle + * @param attr the attributes + * @param attr_timeout validity timeout (in seconds) for the attributes + * @param attr_flags flags to put into fuse_attr.flags + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_attr_with_flags(fuse_req_t req, const struct stat *attr, + double attr_timeout, uint32_t attr_flags); + /** * Reply with the contents of a symbolic link * -- cgit v1.2.3-55-g7522 From eba8b096c17c5a74c5a00af4cf476a9b1b4d7393 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 9 Sep 2020 20:40:25 +0200 Subject: virtiofsd: Store every lo_inode's parent_dev We want to detect mount points in the shared tree. We report them to the guest by setting the FUSE_ATTR_SUBMOUNT flag in fuse_attr.flags, but because the FUSE client will create a submount for every directory that has this flag set, we must do this only for the actual mount points. We can detect mount points by comparing a directory's st_dev with its parent's st_dev. To be able to do so, we need to store the parent's st_dev in the lo_inode object. Note that mount points need not necessarily be directories; a single file can be a mount point as well. However, for the sake of simplicity let us ignore any non-directory mount points for now. Signed-off-by: Max Reitz Message-Id: <20200909184028.262297-6-mreitz@redhat.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/passthrough_ll.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index a0beb986f3..4188bf3ad2 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -125,6 +125,14 @@ struct lo_inode { GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ mode_t filetype; + + /* + * So we can detect crossmount roots + * (As such, this only needs to be valid for directories. Note + * that files can have multiple parents due to hard links, and so + * their parent_dev may fluctuate.) + */ + dev_t parent_dev; }; struct lo_cred { @@ -847,6 +855,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, g_hash_table_insert(lo->inodes, &inode->key, inode); pthread_mutex_unlock(&lo->mutex); } + inode->parent_dev = dir->key.dev; e->ino = inode->fuse_ino; lo_inode_put(lo, &inode); lo_inode_put(lo, &dir); @@ -1073,6 +1082,14 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, name, (unsigned long long)e.ino); + /* + * No need to update inode->parent_dev, because + * (1) We cannot, the inode now has more than one parent, + * (2) Directories cannot have more than one parent, so link() + * does not work for them; but parent_dev only needs to be + * valid for directories. + */ + fuse_reply_entry(req, &e); lo_inode_put(lo, &parent_inode); lo_inode_put(lo, &inode); -- cgit v1.2.3-55-g7522 From 08dce386e77eb9ab044cb118e5391dc9ae11c5a8 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 9 Sep 2020 20:40:26 +0200 Subject: virtiofsd: Announce sub-mount points Whenever we encounter a directory with an st_dev that differs from that of its parent, we set the FUSE_ATTR_SUBMOUNT flag so the guest can create a submount for it. Make this behavior optional, so submounts are only announced to the guest with the announce_submounts option. Some users may prefer the current behavior, so that the guest learns nothing about the host mount structure. Signed-off-by: Max Reitz Message-Id: <20200909184028.262297-7-mreitz@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert Manual merge --- tools/virtiofsd/helper.c | 1 + tools/virtiofsd/passthrough_ll.c | 67 +++++++++++++++++++++++++++++++++++----- 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c index 2e181a49b5..a212f32931 100644 --- a/tools/virtiofsd/helper.c +++ b/tools/virtiofsd/helper.c @@ -178,6 +178,7 @@ void fuse_cmdline_help(void) " default: depends on cache= option.\n" " -o writeback|no_writeback enable/disable writeback cache\n" " default: no_writeback\n" + " -o announce_submounts Announce sub-mount points to the guest\n" " -o xattr|no_xattr enable/disable xattr\n" " default: no_xattr\n" " -o modcaps=CAPLIST Modify the list of capabilities\n" diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index 4188bf3ad2..4db50046d4 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -40,6 +40,7 @@ #include "fuse_virtio.h" #include "fuse_log.h" #include "fuse_lowlevel.h" +#include "standard-headers/linux/fuse.h" #include #include #include @@ -173,6 +174,7 @@ struct lo_data { int timeout_set; int readdirplus_set; int readdirplus_clear; + int announce_submounts; int allow_direct_io; struct lo_inode root; GHashTable *inodes; /* protected by lo->mutex */ @@ -211,6 +213,7 @@ static const struct fuse_opt lo_opts[] = { { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, + { "announce_submounts", offsetof(struct lo_data, announce_submounts), 1 }, { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 }, { "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 }, FUSE_OPT_END @@ -608,22 +611,52 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) } } +/** + * Call fstatat() and set st_rdev whenever a directory's st_dev + * differs from the rparent's st_dev (@parent_dev). This will + * announce submounts to the FUSE client (unless @announce_submounts + * is false). + */ +static int do_fstatat(int dirfd, const char *pathname, struct stat *statbuf, + int flags, dev_t parent_dev, uint32_t *fuse_attr_flags) +{ + int res = fstatat(dirfd, pathname, statbuf, flags); + if (res == -1) { + return res; + } + + if (statbuf->st_dev != parent_dev && S_ISDIR(statbuf->st_mode) && + fuse_attr_flags) + { + *fuse_attr_flags |= FUSE_ATTR_SUBMOUNT; + } + + return 0; +} + static void lo_getattr(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) { int res; struct stat buf; struct lo_data *lo = lo_data(req); + struct lo_inode *inode = lo_inode(req, ino); + uint32_t fuse_attr_flags = 0; (void)fi; - res = - fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + res = do_fstatat(inode->fd, "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW, + inode->parent_dev, &fuse_attr_flags); + lo_inode_put(lo, &inode); if (res == -1) { return (void)fuse_reply_err(req, errno); } - fuse_reply_attr(req, &buf, lo->timeout); + if (!lo->announce_submounts) { + fuse_attr_flags &= ~FUSE_ATTR_SUBMOUNT; + } + + fuse_reply_attr_with_flags(req, &buf, lo->timeout, fuse_attr_flags); } static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) @@ -819,11 +852,16 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, goto out_err; } - res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + res = do_fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW, + dir->key.dev, &e->attr_flags); if (res == -1) { goto out_err; } + if (!lo->announce_submounts) { + e->attr_flags &= ~FUSE_ATTR_SUBMOUNT; + } + inode = lo_find(lo, &e->attr); if (inode) { close(newfd); @@ -1069,11 +1107,17 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, goto out_err; } - res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + res = do_fstatat(inode->fd, "", &e.attr, + AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW, + parent_inode->key.dev, &e.attr_flags); if (res == -1) { goto out_err; } + if (!lo->announce_submounts) { + e.attr_flags &= ~FUSE_ATTR_SUBMOUNT; + } + pthread_mutex_lock(&lo->mutex); inode->nlookup++; pthread_mutex_unlock(&lo->mutex); @@ -1108,14 +1152,21 @@ static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, { int res; struct stat attr; + struct lo_data *lo = lo_data(req); + struct lo_inode *dir = lo_inode(req, parent); + + if (!dir) { + return NULL; + } - res = fstatat(lo_fd(req, parent), name, &attr, - AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + res = do_fstatat(dir->fd, name, &attr, + AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW, dir->key.dev, NULL); + lo_inode_put(lo, &dir); if (res == -1) { return NULL; } - return lo_find(lo_data(req), &attr); + return lo_find(lo, &attr); } static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) -- cgit v1.2.3-55-g7522 From 45ced7ca2f2767c046fb1b9980ba938ad526262e Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 9 Sep 2020 20:40:27 +0200 Subject: tests/acceptance/boot_linux: Accept SSH pubkey Let download_cloudinit() take an optional pubkey, which subclasses of BootLinux can pass through setUp(). Signed-off-by: Max Reitz Message-Id: <20200909184028.262297-8-mreitz@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: WIllian Rampazzo Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- tests/acceptance/boot_linux.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/acceptance/boot_linux.py b/tests/acceptance/boot_linux.py index 0055dc7cee..ad997c3f2e 100644 --- a/tests/acceptance/boot_linux.py +++ b/tests/acceptance/boot_linux.py @@ -57,7 +57,7 @@ class BootLinuxBase(Test): self.cancel('Failed to download/prepare boot image') return boot.path - def download_cloudinit(self): + def download_cloudinit(self, ssh_pubkey=None): self.log.info('Preparing cloudinit image') try: cloudinit_iso = os.path.join(self.workdir, 'cloudinit.iso') @@ -67,7 +67,8 @@ class BootLinuxBase(Test): password='password', # QEMU's hard coded usermode router address phone_home_host='10.0.2.2', - phone_home_port=self.phone_home_port) + phone_home_port=self.phone_home_port, + authorized_key=ssh_pubkey) except Exception: self.cancel('Failed to prepared cloudinit image') return cloudinit_iso @@ -80,19 +81,19 @@ class BootLinux(BootLinuxBase): timeout = 900 chksum = None - def setUp(self): + def setUp(self, ssh_pubkey=None): super(BootLinux, self).setUp() self.vm.add_args('-smp', '2') self.vm.add_args('-m', '1024') self.prepare_boot() - self.prepare_cloudinit() + self.prepare_cloudinit(ssh_pubkey) def prepare_boot(self): path = self.download_boot() self.vm.add_args('-drive', 'file=%s' % path) - def prepare_cloudinit(self): - cloudinit_iso = self.download_cloudinit() + def prepare_cloudinit(self, ssh_pubkey=None): + cloudinit_iso = self.download_cloudinit(ssh_pubkey) self.vm.add_args('-drive', 'file=%s,format=raw' % cloudinit_iso) def launch_and_wait(self): -- cgit v1.2.3-55-g7522 From c93a656f7b6585d9b6f8639d1fbb74dd944be239 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 9 Sep 2020 20:40:28 +0200 Subject: tests/acceptance: Add virtiofs_submounts.py This test invokes several shell scripts to create a random directory tree full of submounts, and then check in the VM whether every submount has its own ID and the structure looks as expected. (Note that the test scripts must be non-executable, so Avocado will not try to execute them as if they were tests on their own, too.) Because at this commit's date it is unlikely that the Linux kernel on the image provided by boot_linux.py supports submounts in virtio-fs, the test will be cancelled if no custom Linux binary is provided through the vmlinuz parameter. (The on-image kernel can be used by providing an empty string via vmlinuz=.) So, invoking the test can be done as follows: $ avocado run \ tests/acceptance/virtiofs_submounts.py \ -p vmlinuz=/path/to/linux/build/arch/x86/boot/bzImage This test requires root privileges (through passwordless sudo -n), because at this point, virtiofsd requires them. (If you have a timestamp_timeout period for sudoers (e.g. the default of 5 min), you can provide this by executing something like "sudo true" before invoking Avocado.) Signed-off-by: Max Reitz Message-Id: <20200909184028.262297-9-mreitz@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- tests/acceptance/virtiofs_submounts.py | 289 +++++++++++++++++++++ .../virtiofs_submounts.py.data/cleanup.sh | 46 ++++ .../virtiofs_submounts.py.data/guest-cleanup.sh | 30 +++ .../acceptance/virtiofs_submounts.py.data/guest.sh | 138 ++++++++++ .../acceptance/virtiofs_submounts.py.data/host.sh | 127 +++++++++ 5 files changed, 630 insertions(+) create mode 100644 tests/acceptance/virtiofs_submounts.py create mode 100644 tests/acceptance/virtiofs_submounts.py.data/cleanup.sh create mode 100644 tests/acceptance/virtiofs_submounts.py.data/guest-cleanup.sh create mode 100644 tests/acceptance/virtiofs_submounts.py.data/guest.sh create mode 100644 tests/acceptance/virtiofs_submounts.py.data/host.sh diff --git a/tests/acceptance/virtiofs_submounts.py b/tests/acceptance/virtiofs_submounts.py new file mode 100644 index 0000000000..8b207b3e57 --- /dev/null +++ b/tests/acceptance/virtiofs_submounts.py @@ -0,0 +1,289 @@ +import logging +import re +import os +import subprocess +import time + +from avocado import skipUnless +from avocado_qemu import Test, BUILD_DIR +from avocado_qemu import wait_for_console_pattern +from avocado.utils import ssh + +from qemu.accel import kvm_available + +from boot_linux import BootLinux + + +def run_cmd(args): + subp = subprocess.Popen(args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True) + stdout, stderr = subp.communicate() + ret = subp.returncode + + return (stdout, stderr, ret) + +def has_passwordless_sudo(): + """ + This function is for use in a @avocado.skipUnless decorator, e.g.: + + @skipUnless(*has_passwordless_sudo()) + def test_something_that_needs_sudo(self): + ... + """ + + _, stderr, exitcode = run_cmd(('sudo', '-n', 'true')) + if exitcode != 0: + return (False, f'Failed to use sudo -n: {stderr.strip()}') + else: + return (True, '') + + +class VirtiofsSubmountsTest(BootLinux): + """ + :avocado: tags=arch:x86_64 + """ + + def get_portfwd(self): + port = None + + res = self.vm.command('human-monitor-command', + command_line='info usernet') + for line in res.split('\r\n'): + match = \ + re.search(r'TCP.HOST_FORWARD.*127\.0\.0\.1\s*(\d+)\s+10\.', + line) + if match is not None: + port = match[1] + break + + self.assertIsNotNone(port) + self.log.debug('sshd listening on port: ' + port) + return port + + def ssh_connect(self, username, keyfile): + self.ssh_logger = logging.getLogger('ssh') + port = self.get_portfwd() + self.ssh_session = ssh.Session('127.0.0.1', port=int(port), + user=username, key=keyfile) + for i in range(10): + try: + self.ssh_session.connect() + return + except: + time.sleep(4) + pass + self.fail('sshd timeout') + + def ssh_command(self, command): + self.ssh_logger.info(command) + result = self.ssh_session.cmd(command) + stdout_lines = [line.rstrip() for line + in result.stdout_text.splitlines()] + for line in stdout_lines: + self.ssh_logger.info(line) + stderr_lines = [line.rstrip() for line + in result.stderr_text.splitlines()] + for line in stderr_lines: + self.ssh_logger.warning(line) + + self.assertEqual(result.exit_status, 0, + f'Guest command failed: {command}') + return stdout_lines, stderr_lines + + def run(self, args, ignore_error=False): + stdout, stderr, ret = run_cmd(args) + + if ret != 0: + cmdline = ' '.join(args) + if not ignore_error: + self.fail(f'{cmdline}: Returned {ret}: {stderr}') + else: + self.log.warn(f'{cmdline}: Returned {ret}: {stderr}') + + return (stdout, stderr, ret) + + def set_up_shared_dir(self): + atwd = os.getenv('AVOCADO_TEST_WORKDIR') + self.shared_dir = os.path.join(atwd, 'virtiofs-shared') + + os.mkdir(self.shared_dir) + + self.run(('cp', self.get_data('guest.sh'), + os.path.join(self.shared_dir, 'check.sh'))) + + self.run(('cp', self.get_data('guest-cleanup.sh'), + os.path.join(self.shared_dir, 'cleanup.sh'))) + + def set_up_virtiofs(self): + attmp = os.getenv('AVOCADO_TESTS_COMMON_TMPDIR') + self.vfsdsock = os.path.join(attmp, 'vfsdsock') + + self.run(('sudo', '-n', 'rm', '-f', self.vfsdsock), ignore_error=True) + + self.virtiofsd = \ + subprocess.Popen(('sudo', '-n', + 'tools/virtiofsd/virtiofsd', + f'--socket-path={self.vfsdsock}', + '-o', f'source={self.shared_dir}', + '-o', 'cache=always', + '-o', 'xattr', + '-o', 'announce_submounts', + '-f'), + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + universal_newlines=True) + + while not os.path.exists(self.vfsdsock): + if self.virtiofsd.poll() is not None: + self.fail('virtiofsd exited prematurely: ' + + self.virtiofsd.communicate()[1]) + time.sleep(0.1) + + self.run(('sudo', '-n', 'chmod', 'go+rw', self.vfsdsock)) + + self.vm.add_args('-chardev', + f'socket,id=vfsdsock,path={self.vfsdsock}', + '-device', + 'vhost-user-fs-pci,queue-size=1024,chardev=vfsdsock' \ + ',tag=host', + '-object', + 'memory-backend-file,id=mem,size=1G,' \ + 'mem-path=/dev/shm,share=on', + '-numa', + 'node,memdev=mem') + + def launch_vm(self): + self.launch_and_wait() + self.ssh_connect('root', self.ssh_key) + + def set_up_nested_mounts(self): + scratch_dir = os.path.join(self.shared_dir, 'scratch') + try: + os.mkdir(scratch_dir) + except FileExistsError: + pass + + args = ['bash', self.get_data('host.sh'), scratch_dir] + if self.seed: + args += [self.seed] + + out, _, _ = self.run(args) + seed = re.search(r'^Seed: \d+', out) + self.log.info(seed[0]) + + def mount_in_guest(self): + self.ssh_command('mkdir -p /mnt/host') + self.ssh_command('mount -t virtiofs host /mnt/host') + + def check_in_guest(self): + self.ssh_command('bash /mnt/host/check.sh /mnt/host/scratch/share') + + def live_cleanup(self): + self.ssh_command('bash /mnt/host/cleanup.sh /mnt/host/scratch') + + # It would be nice if the above was sufficient to make virtiofsd clear + # all references to the mounted directories (so they can be unmounted + # on the host), but unfortunately it is not. To do so, we have to + # resort to a remount. + self.ssh_command('mount -o remount /mnt/host') + + scratch_dir = os.path.join(self.shared_dir, 'scratch') + self.run(('bash', self.get_data('cleanup.sh'), scratch_dir)) + + @skipUnless(*has_passwordless_sudo()) + def setUp(self): + vmlinuz = self.params.get('vmlinuz') + if vmlinuz is None: + self.cancel('vmlinuz parameter not set; you must point it to a ' + 'Linux kernel binary to test (to run this test with ' \ + 'the on-image kernel, set it to an empty string)') + + self.seed = self.params.get('seed') + + atwd = os.getenv('AVOCADO_TEST_WORKDIR') + self.ssh_key = os.path.join(atwd, 'id_ed25519') + + self.run(('ssh-keygen', '-t', 'ed25519', '-f', self.ssh_key)) + + pubkey = open(self.ssh_key + '.pub').read() + + super(VirtiofsSubmountsTest, self).setUp(pubkey) + + if len(vmlinuz) > 0: + self.vm.add_args('-kernel', vmlinuz, + '-append', 'console=ttyS0 root=/dev/sda1') + + # Allow us to connect to SSH + self.vm.add_args('-netdev', 'user,id=vnet,hostfwd=:127.0.0.1:0-:22', + '-device', 'e1000,netdev=vnet') + + if not kvm_available(self.arch, self.qemu_bin): + self.cancel(KVM_NOT_AVAILABLE) + self.vm.add_args('-accel', 'kvm') + + def tearDown(self): + try: + self.vm.shutdown() + except: + pass + + scratch_dir = os.path.join(self.shared_dir, 'scratch') + self.run(('bash', self.get_data('cleanup.sh'), scratch_dir), + ignore_error=True) + + def test_pre_virtiofsd_set_up(self): + self.set_up_shared_dir() + + self.set_up_nested_mounts() + + self.set_up_virtiofs() + self.launch_vm() + self.mount_in_guest() + self.check_in_guest() + + def test_pre_launch_set_up(self): + self.set_up_shared_dir() + self.set_up_virtiofs() + + self.set_up_nested_mounts() + + self.launch_vm() + self.mount_in_guest() + self.check_in_guest() + + def test_post_launch_set_up(self): + self.set_up_shared_dir() + self.set_up_virtiofs() + self.launch_vm() + + self.set_up_nested_mounts() + + self.mount_in_guest() + self.check_in_guest() + + def test_post_mount_set_up(self): + self.set_up_shared_dir() + self.set_up_virtiofs() + self.launch_vm() + self.mount_in_guest() + + self.set_up_nested_mounts() + + self.check_in_guest() + + def test_two_runs(self): + self.set_up_shared_dir() + + self.set_up_nested_mounts() + + self.set_up_virtiofs() + self.launch_vm() + self.mount_in_guest() + self.check_in_guest() + + self.live_cleanup() + self.set_up_nested_mounts() + + self.check_in_guest() diff --git a/tests/acceptance/virtiofs_submounts.py.data/cleanup.sh b/tests/acceptance/virtiofs_submounts.py.data/cleanup.sh new file mode 100644 index 0000000000..2a6579a0fe --- /dev/null +++ b/tests/acceptance/virtiofs_submounts.py.data/cleanup.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +function print_usage() +{ + if [ -n "$2" ]; then + echo "Error: $2" + echo + fi + echo "Usage: $1 " +} + +scratch_dir=$1 +if [ -z "$scratch_dir" ]; then + print_usage "$0" 'Scratch dir not given' >&2 + exit 1 +fi + +cd "$scratch_dir/share" || exit 1 +mps=(mnt*) +mp_i=0 +for mp in "${mps[@]}"; do + mp_i=$((mp_i + 1)) + printf "Unmounting %i/%i...\r" "$mp_i" "${#mps[@]}" + + sudo umount -R "$mp" + rm -rf "$mp" +done +echo + +rm some-file +cd .. +rmdir share + +imgs=(fs*.img) +img_i=0 +for img in "${imgs[@]}"; do + img_i=$((img_i + 1)) + printf "Detaching and deleting %i/%i...\r" "$img_i" "${#imgs[@]}" + + dev=$(losetup -j "$img" | sed -e 's/:.*//') + sudo losetup -d "$dev" + rm -f "$img" +done +echo + +echo 'Done.' diff --git a/tests/acceptance/virtiofs_submounts.py.data/guest-cleanup.sh b/tests/acceptance/virtiofs_submounts.py.data/guest-cleanup.sh new file mode 100644 index 0000000000..729cb2d1a5 --- /dev/null +++ b/tests/acceptance/virtiofs_submounts.py.data/guest-cleanup.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +function print_usage() +{ + if [ -n "$2" ]; then + echo "Error: $2" + echo + fi + echo "Usage: $1 " +} + +scratch_dir=$1 +if [ -z "$scratch_dir" ]; then + print_usage "$0" 'Scratch dir not given' >&2 + exit 1 +fi + +cd "$scratch_dir/share" || exit 1 + +mps=(mnt*) +mp_i=0 +for mp in "${mps[@]}"; do + mp_i=$((mp_i + 1)) + printf "Unmounting %i/%i...\r" "$mp_i" "${#mps[@]}" + + sudo umount -R "$mp" +done +echo + +echo 'Done.' diff --git a/tests/acceptance/virtiofs_submounts.py.data/guest.sh b/tests/acceptance/virtiofs_submounts.py.data/guest.sh new file mode 100644 index 0000000000..59ba40fde1 --- /dev/null +++ b/tests/acceptance/virtiofs_submounts.py.data/guest.sh @@ -0,0 +1,138 @@ +#!/bin/bash + +function print_usage() +{ + if [ -n "$2" ]; then + echo "Error: $2" + echo + fi + echo "Usage: $1 " + echo '(The shared directory is the "share" directory in the scratch' \ + 'directory)' +} + +shared_dir=$1 +if [ -z "$shared_dir" ]; then + print_usage "$0" 'Shared dir not given' >&2 + exit 1 +fi + +cd "$shared_dir" + +# FIXME: This should not be necessary, but it is. In order for all +# submounts to be proper mount points, we need to visit them. +# (Before we visit them, they will not be auto-mounted, and so just +# appear as normal directories, with the catch that their st_ino will +# be the st_ino of the filesystem they host, while the st_dev will +# still be the st_dev of the parent.) +# `find` does not work, because it will refuse to touch the mount +# points as long as they are not mounted; their st_dev being shared +# with the parent and st_ino just being the root node's inode ID +# will practically ensure that this node exists elsewhere on the +# filesystem, and `find` is required to recognize loops and not to +# follow them. +# Thus, we have to manually visit all nodes first. + +mnt_i=0 + +function recursively_visit() +{ + pushd "$1" >/dev/null + for entry in *; do + if [[ "$entry" == mnt* ]]; then + mnt_i=$((mnt_i + 1)) + printf "Triggering auto-mount $mnt_i...\r" + fi + + if [ -d "$entry" ]; then + recursively_visit "$entry" + fi + done + popd >/dev/null +} + +recursively_visit . +echo + + +if [ -n "$(find -name not-mounted)" ]; then + echo "Error: not-mounted files visible on mount points:" >&2 + find -name not-mounted >&2 + exit 1 +fi + +if [ ! -f some-file -o "$(cat some-file)" != 'root' ]; then + echo "Error: Bad file in the share root" >&2 + exit 1 +fi + +shopt -s nullglob + +function check_submounts() +{ + local base_path=$1 + + for mp in mnt*; do + printf "Checking submount %i...\r" "$((${#devs[@]} + 1))" + + mp_i=$(echo "$mp" | sed -e 's/mnt//') + dev=$(stat -c '%D' "$mp") + + if [ -n "${devs[mp_i]}" ]; then + echo "Error: $mp encountered twice" >&2 + exit 1 + fi + devs[mp_i]=$dev + + pushd "$mp" >/dev/null + path="$base_path$mp" + while true; do + expected_content="$(printf '%s\n%s\n' "$mp_i" "$path")" + if [ ! -f some-file ]; then + echo "Error: $PWD/some-file does not exist" >&2 + exit 1 + fi + + if [ "$(cat some-file)" != "$expected_content" ]; then + echo "Error: Bad content in $PWD/some-file:" >&2 + echo '--- found ---' + cat some-file + echo '--- expected ---' + echo "$expected_content" + exit 1 + fi + if [ "$(stat -c '%D' some-file)" != "$dev" ]; then + echo "Error: $PWD/some-file has the wrong device ID" >&2 + exit 1 + fi + + if [ -d sub ]; then + if [ "$(stat -c '%D' sub)" != "$dev" ]; then + echo "Error: $PWD/some-file has the wrong device ID" >&2 + exit 1 + fi + cd sub + path="$path/sub" + else + if [ -n "$(echo mnt*)" ]; then + check_submounts "$path/" + fi + break + fi + done + popd >/dev/null + done +} + +root_dev=$(stat -c '%D' some-file) +devs=() +check_submounts '' +echo + +reused_devs=$(echo "$root_dev ${devs[@]}" | tr ' ' '\n' | sort | uniq -d) +if [ -n "$reused_devs" ]; then + echo "Error: Reused device IDs: $reused_devs" >&2 + exit 1 +fi + +echo "Test passed for ${#devs[@]} submounts." diff --git a/tests/acceptance/virtiofs_submounts.py.data/host.sh b/tests/acceptance/virtiofs_submounts.py.data/host.sh new file mode 100644 index 0000000000..d8a9afebdb --- /dev/null +++ b/tests/acceptance/virtiofs_submounts.py.data/host.sh @@ -0,0 +1,127 @@ +#!/bin/bash + +mount_count=128 + +function print_usage() +{ + if [ -n "$2" ]; then + echo "Error: $2" + echo + fi + echo "Usage: $1 [seed]" + echo "(If no seed is given, it will be randomly generated.)" +} + +scratch_dir=$1 +if [ -z "$scratch_dir" ]; then + print_usage "$0" 'No scratch dir given' >&2 + exit 1 +fi + +if [ ! -d "$scratch_dir" ]; then + print_usage "$0" "$scratch_dir is not a directory" >&2 + exit 1 +fi + +seed=$2 +if [ -z "$seed" ]; then + seed=$RANDOM +fi +RANDOM=$seed + +echo "Seed: $seed" + +set -e +shopt -s nullglob + +cd "$scratch_dir" +if [ -d share ]; then + echo 'Error: This directory seems to be in use already' >&2 + exit 1 +fi + +for ((i = 0; i < $mount_count; i++)); do + printf "Setting up fs %i/%i...\r" "$((i + 1))" "$mount_count" + + rm -f fs$i.img + truncate -s 512M fs$i.img + mkfs.xfs -q fs$i.img + devs[i]=$(sudo losetup -f --show fs$i.img) +done +echo + +top_level_mounts=$((RANDOM % mount_count + 1)) + +mkdir -p share +echo 'root' > share/some-file + +for ((i = 0; i < $top_level_mounts; i++)); do + printf "Mounting fs %i/%i...\r" "$((i + 1))" "$mount_count" + + mkdir -p share/mnt$i + touch share/mnt$i/not-mounted + sudo mount "${devs[i]}" share/mnt$i + sudo chown "$(id -u):$(id -g)" share/mnt$i + + pushd share/mnt$i >/dev/null + path=mnt$i + nesting=$((RANDOM % 4)) + for ((j = 0; j < $nesting; j++)); do + cat > some-file < some-file </dev/null +done + +for ((; i < $mount_count; i++)); do + printf "Mounting fs %i/%i...\r" "$((i + 1))" "$mount_count" + + mp_i=$((i % top_level_mounts)) + + pushd share/mnt$mp_i >/dev/null + path=mnt$mp_i + while true; do + sub_mp="$(echo mnt*)" + if cd sub 2>/dev/null; then + path="$path/sub" + elif [ -n "$sub_mp" ] && cd "$sub_mp" 2>/dev/null; then + path="$path/$sub_mp" + else + break + fi + done + mkdir mnt$i + touch mnt$i/not-mounted + sudo mount "${devs[i]}" mnt$i + sudo chown "$(id -u):$(id -g)" mnt$i + + cd mnt$i + path="$path/mnt$i" + nesting=$((RANDOM % 4)) + for ((j = 0; j < $nesting; j++)); do + cat > some-file < some-file </dev/null +done +echo + +echo 'Done.' -- cgit v1.2.3-55-g7522