summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorPeter Maydell2019-07-01 12:28:28 +0200
committerPeter Maydell2019-07-01 12:28:28 +0200
commit7fec76a02267598a4e437ddfdaeaeb6de09b92f3 (patch)
tree338c85fea4651c80e9a7d681ec78ce0d87c9c65f /block
parentMerge remote-tracking branch 'remotes/amarkovic/tags/mips-queue-jun-21-2019' ... (diff)
parentiotests: Fix 205 for concurrent runs (diff)
downloadqemu-7fec76a02267598a4e437ddfdaeaeb6de09b92f3.tar.gz
qemu-7fec76a02267598a4e437ddfdaeaeb6de09b92f3.tar.xz
qemu-7fec76a02267598a4e437ddfdaeaeb6de09b92f3.zip
Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2019-06-24' into staging
Block patches: - The SSH block driver now uses libssh instead of libssh2 - The VMDK block driver gets read-only support for the seSparse subformat - Various fixes # gpg: Signature made Mon 24 Jun 2019 15:42:56 BST # gpg: using RSA key 91BEB60A30DB3E8857D11829F407DB0061D5CF40 # gpg: issuer "mreitz@redhat.com" # gpg: Good signature from "Max Reitz <mreitz@redhat.com>" [full] # Primary key fingerprint: 91BE B60A 30DB 3E88 57D1 1829 F407 DB00 61D5 CF40 * remotes/maxreitz/tags/pull-block-2019-06-24: iotests: Fix 205 for concurrent runs ssh: switch from libssh2 to libssh vmdk: Add read-only support for seSparse snapshots vmdk: Reduce the max bound for L1 table size vmdk: Fix comment regarding max l1_size coverage iotest 134: test cluster-misaligned encrypted write blockdev: enable non-root nodes for transaction drive-backup source nvme: do not advertise support for unsupported arbitration mechanism Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block')
-rw-r--r--block/Makefile.objs6
-rw-r--r--block/ssh.c652
-rw-r--r--block/trace-events14
-rw-r--r--block/vmdk.c372
4 files changed, 723 insertions, 321 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs
index dbd1522722..35f3bca4d9 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -31,7 +31,7 @@ block-obj-$(CONFIG_CURL) += curl.o
block-obj-$(CONFIG_RBD) += rbd.o
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_VXHS) += vxhs.o
-block-obj-$(CONFIG_LIBSSH2) += ssh.o
+block-obj-$(CONFIG_LIBSSH) += ssh.o
block-obj-y += accounting.o dirty-bitmap.o
block-obj-y += write-threshold.o
block-obj-y += backup.o
@@ -52,8 +52,8 @@ rbd.o-libs := $(RBD_LIBS)
gluster.o-cflags := $(GLUSTERFS_CFLAGS)
gluster.o-libs := $(GLUSTERFS_LIBS)
vxhs.o-libs := $(VXHS_LIBS)
-ssh.o-cflags := $(LIBSSH2_CFLAGS)
-ssh.o-libs := $(LIBSSH2_LIBS)
+ssh.o-cflags := $(LIBSSH_CFLAGS)
+ssh.o-libs := $(LIBSSH_LIBS)
block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o
block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y)
dmg-bz2.o-libs := $(BZIP2_LIBS)
diff --git a/block/ssh.c b/block/ssh.c
index 6da7b9cbfe..501933b855 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -24,8 +24,8 @@
#include "qemu/osdep.h"
-#include <libssh2.h>
-#include <libssh2_sftp.h>
+#include <libssh/libssh.h>
+#include <libssh/sftp.h>
#include "block/block_int.h"
#include "block/qdict.h"
@@ -46,13 +46,11 @@
#include "trace.h"
/*
- * TRACE_LIBSSH2=<bitmask> enables tracing in libssh2 itself. Note
- * that this requires that libssh2 was specially compiled with the
- * `./configure --enable-debug' option, so most likely you will have
- * to compile it yourself. The meaning of <bitmask> is described
- * here: http://www.libssh2.org/libssh2_trace.html
+ * TRACE_LIBSSH=<level> enables tracing in libssh itself.
+ * The meaning of <level> is described here:
+ * http://api.libssh.org/master/group__libssh__log.html
*/
-#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */
+#define TRACE_LIBSSH 0 /* see: SSH_LOG_* */
typedef struct BDRVSSHState {
/* Coroutine. */
@@ -60,18 +58,15 @@ typedef struct BDRVSSHState {
/* SSH connection. */
int sock; /* socket */
- LIBSSH2_SESSION *session; /* ssh session */
- LIBSSH2_SFTP *sftp; /* sftp session */
- LIBSSH2_SFTP_HANDLE *sftp_handle; /* sftp remote file handle */
+ ssh_session session; /* ssh session */
+ sftp_session sftp; /* sftp session */
+ sftp_file sftp_handle; /* sftp remote file handle */
- /* See ssh_seek() function below. */
- int64_t offset;
- bool offset_op_read;
-
- /* File attributes at open. We try to keep the .filesize field
+ /*
+ * File attributes at open. We try to keep the .size field
* updated if it changes (eg by writing at the end of the file).
*/
- LIBSSH2_SFTP_ATTRIBUTES attrs;
+ sftp_attributes attrs;
InetSocketAddress *inet;
@@ -91,7 +86,6 @@ static void ssh_state_init(BDRVSSHState *s)
{
memset(s, 0, sizeof *s);
s->sock = -1;
- s->offset = -1;
qemu_co_mutex_init(&s->lock);
}
@@ -99,20 +93,18 @@ static void ssh_state_free(BDRVSSHState *s)
{
g_free(s->user);
+ if (s->attrs) {
+ sftp_attributes_free(s->attrs);
+ }
if (s->sftp_handle) {
- libssh2_sftp_close(s->sftp_handle);
+ sftp_close(s->sftp_handle);
}
if (s->sftp) {
- libssh2_sftp_shutdown(s->sftp);
+ sftp_free(s->sftp);
}
if (s->session) {
- libssh2_session_disconnect(s->session,
- "from qemu ssh client: "
- "user closed the connection");
- libssh2_session_free(s->session);
- }
- if (s->sock >= 0) {
- close(s->sock);
+ ssh_disconnect(s->session);
+ ssh_free(s->session); /* This frees s->sock */
}
}
@@ -127,13 +119,13 @@ session_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
va_end(args);
if (s->session) {
- char *ssh_err;
+ const char *ssh_err;
int ssh_err_code;
- /* This is not an errno. See <libssh2.h>. */
- ssh_err_code = libssh2_session_last_error(s->session,
- &ssh_err, NULL, 0);
- error_setg(errp, "%s: %s (libssh2 error code: %d)",
+ /* This is not an errno. See <libssh/libssh.h>. */
+ ssh_err = ssh_get_error(s->session);
+ ssh_err_code = ssh_get_error_code(s->session);
+ error_setg(errp, "%s: %s (libssh error code: %d)",
msg, ssh_err, ssh_err_code);
} else {
error_setg(errp, "%s", msg);
@@ -152,18 +144,18 @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
va_end(args);
if (s->sftp) {
- char *ssh_err;
+ const char *ssh_err;
int ssh_err_code;
- unsigned long sftp_err_code;
+ int sftp_err_code;
- /* This is not an errno. See <libssh2.h>. */
- ssh_err_code = libssh2_session_last_error(s->session,
- &ssh_err, NULL, 0);
- /* See <libssh2_sftp.h>. */
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
+ /* This is not an errno. See <libssh/libssh.h>. */
+ ssh_err = ssh_get_error(s->session);
+ ssh_err_code = ssh_get_error_code(s->session);
+ /* See <libssh/sftp.h>. */
+ sftp_err_code = sftp_get_error(s->sftp);
error_setg(errp,
- "%s: %s (libssh2 error code: %d, sftp error code: %lu)",
+ "%s: %s (libssh error code: %d, sftp error code: %d)",
msg, ssh_err, ssh_err_code, sftp_err_code);
} else {
error_setg(errp, "%s", msg);
@@ -173,15 +165,15 @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
static void sftp_error_trace(BDRVSSHState *s, const char *op)
{
- char *ssh_err;
+ const char *ssh_err;
int ssh_err_code;
- unsigned long sftp_err_code;
+ int sftp_err_code;
- /* This is not an errno. See <libssh2.h>. */
- ssh_err_code = libssh2_session_last_error(s->session,
- &ssh_err, NULL, 0);
- /* See <libssh2_sftp.h>. */
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
+ /* This is not an errno. See <libssh/libssh.h>. */
+ ssh_err = ssh_get_error(s->session);
+ ssh_err_code = ssh_get_error_code(s->session);
+ /* See <libssh/sftp.h>. */
+ sftp_err_code = sftp_get_error(s->sftp);
trace_sftp_error(op, ssh_err, ssh_err_code, sftp_err_code);
}
@@ -282,82 +274,120 @@ static void ssh_parse_filename(const char *filename, QDict *options,
parse_uri(filename, options, errp);
}
-static int check_host_key_knownhosts(BDRVSSHState *s,
- const char *host, int port, Error **errp)
+static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp)
{
- const char *home;
- char *knh_file = NULL;
- LIBSSH2_KNOWNHOSTS *knh = NULL;
- struct libssh2_knownhost *found;
- int ret, r;
- const char *hostkey;
- size_t len;
- int type;
-
- hostkey = libssh2_session_hostkey(s->session, &len, &type);
- if (!hostkey) {
+ int ret;
+#ifdef HAVE_LIBSSH_0_8
+ enum ssh_known_hosts_e state;
+ int r;
+ ssh_key pubkey;
+ enum ssh_keytypes_e pubkey_type;
+ unsigned char *server_hash = NULL;
+ size_t server_hash_len;
+ char *fingerprint = NULL;
+
+ state = ssh_session_is_known_server(s->session);
+ trace_ssh_server_status(state);
+
+ switch (state) {
+ case SSH_KNOWN_HOSTS_OK:
+ /* OK */
+ trace_ssh_check_host_key_knownhosts();
+ break;
+ case SSH_KNOWN_HOSTS_CHANGED:
ret = -EINVAL;
- session_error_setg(errp, s, "failed to read remote host key");
+ r = ssh_get_server_publickey(s->session, &pubkey);
+ if (r == 0) {
+ r = ssh_get_publickey_hash(pubkey, SSH_PUBLICKEY_HASH_SHA256,
+ &server_hash, &server_hash_len);
+ pubkey_type = ssh_key_type(pubkey);
+ ssh_key_free(pubkey);
+ }
+ if (r == 0) {
+ fingerprint = ssh_get_fingerprint_hash(SSH_PUBLICKEY_HASH_SHA256,
+ server_hash,
+ server_hash_len);
+ ssh_clean_pubkey_hash(&server_hash);
+ }
+ if (fingerprint) {
+ error_setg(errp,
+ "host key (%s key with fingerprint %s) does not match "
+ "the one in known_hosts; this may be a possible attack",
+ ssh_key_type_to_char(pubkey_type), fingerprint);
+ ssh_string_free_char(fingerprint);
+ } else {
+ error_setg(errp,
+ "host key does not match the one in known_hosts; this "
+ "may be a possible attack");
+ }
goto out;
- }
-
- knh = libssh2_knownhost_init(s->session);
- if (!knh) {
+ case SSH_KNOWN_HOSTS_OTHER:
ret = -EINVAL;
- session_error_setg(errp, s,
- "failed to initialize known hosts support");
+ error_setg(errp,
+ "host key for this server not found, another type exists");
+ goto out;
+ case SSH_KNOWN_HOSTS_UNKNOWN:
+ ret = -EINVAL;
+ error_setg(errp, "no host key was found in known_hosts");
+ goto out;
+ case SSH_KNOWN_HOSTS_NOT_FOUND:
+ ret = -ENOENT;
+ error_setg(errp, "known_hosts file not found");
+ goto out;
+ case SSH_KNOWN_HOSTS_ERROR:
+ ret = -EINVAL;
+ error_setg(errp, "error while checking the host");
+ goto out;
+ default:
+ ret = -EINVAL;
+ error_setg(errp, "error while checking for known server (%d)", state);
goto out;
}
+#else /* !HAVE_LIBSSH_0_8 */
+ int state;
- home = getenv("HOME");
- if (home) {
- knh_file = g_strdup_printf("%s/.ssh/known_hosts", home);
- } else {
- knh_file = g_strdup_printf("/root/.ssh/known_hosts");
- }
-
- /* Read all known hosts from OpenSSH-style known_hosts file. */
- libssh2_knownhost_readfile(knh, knh_file, LIBSSH2_KNOWNHOST_FILE_OPENSSH);
+ state = ssh_is_server_known(s->session);
+ trace_ssh_server_status(state);
- r = libssh2_knownhost_checkp(knh, host, port, hostkey, len,
- LIBSSH2_KNOWNHOST_TYPE_PLAIN|
- LIBSSH2_KNOWNHOST_KEYENC_RAW,
- &found);
- switch (r) {
- case LIBSSH2_KNOWNHOST_CHECK_MATCH:
+ switch (state) {
+ case SSH_SERVER_KNOWN_OK:
/* OK */
- trace_ssh_check_host_key_knownhosts(found->key);
+ trace_ssh_check_host_key_knownhosts();
break;
- case LIBSSH2_KNOWNHOST_CHECK_MISMATCH:
+ case SSH_SERVER_KNOWN_CHANGED:
ret = -EINVAL;
- session_error_setg(errp, s,
- "host key does not match the one in known_hosts"
- " (found key %s)", found->key);
+ error_setg(errp,
+ "host key does not match the one in known_hosts; this "
+ "may be a possible attack");
goto out;
- case LIBSSH2_KNOWNHOST_CHECK_NOTFOUND:
+ case SSH_SERVER_FOUND_OTHER:
ret = -EINVAL;
- session_error_setg(errp, s, "no host key was found in known_hosts");
+ error_setg(errp,
+ "host key for this server not found, another type exists");
+ goto out;
+ case SSH_SERVER_FILE_NOT_FOUND:
+ ret = -ENOENT;
+ error_setg(errp, "known_hosts file not found");
goto out;
- case LIBSSH2_KNOWNHOST_CHECK_FAILURE:
+ case SSH_SERVER_NOT_KNOWN:
ret = -EINVAL;
- session_error_setg(errp, s,
- "failure matching the host key with known_hosts");
+ error_setg(errp, "no host key was found in known_hosts");
+ goto out;
+ case SSH_SERVER_ERROR:
+ ret = -EINVAL;
+ error_setg(errp, "server error");
goto out;
default:
ret = -EINVAL;
- session_error_setg(errp, s, "unknown error matching the host key"
- " with known_hosts (%d)", r);
+ error_setg(errp, "error while checking for known server (%d)", state);
goto out;
}
+#endif /* !HAVE_LIBSSH_0_8 */
/* known_hosts checking successful. */
ret = 0;
out:
- if (knh != NULL) {
- libssh2_knownhost_free(knh);
- }
- g_free(knh_file);
return ret;
}
@@ -401,18 +431,34 @@ static int compare_fingerprint(const unsigned char *fingerprint, size_t len,
static int
check_host_key_hash(BDRVSSHState *s, const char *hash,
- int hash_type, size_t fingerprint_len, Error **errp)
+ enum ssh_publickey_hash_type type, Error **errp)
{
- const char *fingerprint;
-
- fingerprint = libssh2_hostkey_hash(s->session, hash_type);
- if (!fingerprint) {
+ int r;
+ ssh_key pubkey;
+ unsigned char *server_hash;
+ size_t server_hash_len;
+
+#ifdef HAVE_LIBSSH_0_8
+ r = ssh_get_server_publickey(s->session, &pubkey);
+#else
+ r = ssh_get_publickey(s->session, &pubkey);
+#endif
+ if (r != SSH_OK) {
session_error_setg(errp, s, "failed to read remote host key");
return -EINVAL;
}
- if(compare_fingerprint((unsigned char *) fingerprint, fingerprint_len,
- hash) != 0) {
+ r = ssh_get_publickey_hash(pubkey, type, &server_hash, &server_hash_len);
+ ssh_key_free(pubkey);
+ if (r != 0) {
+ session_error_setg(errp, s,
+ "failed reading the hash of the server SSH key");
+ return -EINVAL;
+ }
+
+ r = compare_fingerprint(server_hash, server_hash_len, hash);
+ ssh_clean_pubkey_hash(&server_hash);
+ if (r != 0) {
error_setg(errp, "remote host key does not match host_key_check '%s'",
hash);
return -EPERM;
@@ -421,8 +467,7 @@ check_host_key_hash(BDRVSSHState *s, const char *hash,
return 0;
}
-static int check_host_key(BDRVSSHState *s, const char *host, int port,
- SshHostKeyCheck *hkc, Error **errp)
+static int check_host_key(BDRVSSHState *s, SshHostKeyCheck *hkc, Error **errp)
{
SshHostKeyCheckMode mode;
@@ -438,15 +483,15 @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
case SSH_HOST_KEY_CHECK_MODE_HASH:
if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_MD5) {
return check_host_key_hash(s, hkc->u.hash.hash,
- LIBSSH2_HOSTKEY_HASH_MD5, 16, errp);
+ SSH_PUBLICKEY_HASH_MD5, errp);
} else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA1) {
return check_host_key_hash(s, hkc->u.hash.hash,
- LIBSSH2_HOSTKEY_HASH_SHA1, 20, errp);
+ SSH_PUBLICKEY_HASH_SHA1, errp);
}
g_assert_not_reached();
break;
case SSH_HOST_KEY_CHECK_MODE_KNOWN_HOSTS:
- return check_host_key_knownhosts(s, host, port, errp);
+ return check_host_key_knownhosts(s, errp);
default:
g_assert_not_reached();
}
@@ -454,60 +499,43 @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
return -EINVAL;
}
-static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
+static int authenticate(BDRVSSHState *s, Error **errp)
{
int r, ret;
- const char *userauthlist;
- LIBSSH2_AGENT *agent = NULL;
- struct libssh2_agent_publickey *identity;
- struct libssh2_agent_publickey *prev_identity = NULL;
+ int method;
- userauthlist = libssh2_userauth_list(s->session, user, strlen(user));
- if (strstr(userauthlist, "publickey") == NULL) {
+ /* Try to authenticate with the "none" method. */
+ r = ssh_userauth_none(s->session, NULL);
+ if (r == SSH_AUTH_ERROR) {
ret = -EPERM;
- error_setg(errp,
- "remote server does not support \"publickey\" authentication");
+ session_error_setg(errp, s, "failed to authenticate using none "
+ "authentication");
goto out;
- }
-
- /* Connect to ssh-agent and try each identity in turn. */
- agent = libssh2_agent_init(s->session);
- if (!agent) {
- ret = -EINVAL;
- session_error_setg(errp, s, "failed to initialize ssh-agent support");
- goto out;
- }
- if (libssh2_agent_connect(agent)) {
- ret = -ECONNREFUSED;
- session_error_setg(errp, s, "failed to connect to ssh-agent");
- goto out;
- }
- if (libssh2_agent_list_identities(agent)) {
- ret = -EINVAL;
- session_error_setg(errp, s,
- "failed requesting identities from ssh-agent");
+ } else if (r == SSH_AUTH_SUCCESS) {
+ /* Authenticated! */
+ ret = 0;
goto out;
}
- for(;;) {
- r = libssh2_agent_get_identity(agent, &identity, prev_identity);
- if (r == 1) { /* end of list */
- break;
- }
- if (r < 0) {
+ method = ssh_userauth_list(s->session, NULL);
+ trace_ssh_auth_methods(method);
+
+ /*
+ * Try to authenticate with publickey, using the ssh-agent
+ * if available.
+ */
+ if (method & SSH_AUTH_METHOD_PUBLICKEY) {
+ r = ssh_userauth_publickey_auto(s->session, NULL, NULL);
+ if (r == SSH_AUTH_ERROR) {
ret = -EINVAL;
- session_error_setg(errp, s,
- "failed to obtain identity from ssh-agent");
+ session_error_setg(errp, s, "failed to authenticate using "
+ "publickey authentication");
goto out;
- }
- r = libssh2_agent_userauth(agent, user, identity);
- if (r == 0) {
+ } else if (r == SSH_AUTH_SUCCESS) {
/* Authenticated! */
ret = 0;
goto out;
}
- /* Failed to authenticate with this identity, try the next one. */
- prev_identity = identity;
}
ret = -EPERM;
@@ -515,13 +543,6 @@ static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
"and the identities held by your ssh-agent");
out:
- if (agent != NULL) {
- /* Note: libssh2 implementation implicitly calls
- * libssh2_agent_disconnect if necessary.
- */
- libssh2_agent_free(agent);
- }
-
return ret;
}
@@ -640,7 +661,8 @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
int ssh_flags, int creat_mode, Error **errp)
{
int r, ret;
- long port = 0;
+ unsigned int port = 0;
+ int new_sock = -1;
if (opts->has_user) {
s->user = g_strdup(opts->user);
@@ -657,71 +679,147 @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
s->inet = opts->server;
opts->server = NULL;
- if (qemu_strtol(s->inet->port, NULL, 10, &port) < 0) {
+ if (qemu_strtoui(s->inet->port, NULL, 10, &port) < 0) {
error_setg(errp, "Use only numeric port value");
ret = -EINVAL;
goto err;
}
/* Open the socket and connect. */
- s->sock = inet_connect_saddr(s->inet, errp);
- if (s->sock < 0) {
+ new_sock = inet_connect_saddr(s->inet, errp);
+ if (new_sock < 0) {
ret = -EIO;
goto err;
}
+ /*
+ * Try to disable the Nagle algorithm on TCP sockets to reduce latency,
+ * but do not fail if it cannot be disabled.
+ */
+ r = socket_set_nodelay(new_sock);
+ if (r < 0) {
+ warn_report("can't set TCP_NODELAY for the ssh server %s: %s",
+ s->inet->host, strerror(errno));
+ }
+
/* Create SSH session. */
- s->session = libssh2_session_init();
+ s->session = ssh_new();
if (!s->session) {
ret = -EINVAL;
- session_error_setg(errp, s, "failed to initialize libssh2 session");
+ session_error_setg(errp, s, "failed to initialize libssh session");
goto err;
}
-#if TRACE_LIBSSH2 != 0
- libssh2_trace(s->session, TRACE_LIBSSH2);
-#endif
+ /*
+ * Make sure we are in blocking mode during the connection and
+ * authentication phases.
+ */
+ ssh_set_blocking(s->session, 1);
- r = libssh2_session_handshake(s->session, s->sock);
- if (r != 0) {
+ r = ssh_options_set(s->session, SSH_OPTIONS_USER, s->user);
+ if (r < 0) {
+ ret = -EINVAL;
+ session_error_setg(errp, s,
+ "failed to set the user in the libssh session");
+ goto err;
+ }
+
+ r = ssh_options_set(s->session, SSH_OPTIONS_HOST, s->inet->host);
+ if (r < 0) {
+ ret = -EINVAL;
+ session_error_setg(errp, s,
+ "failed to set the host in the libssh session");
+ goto err;
+ }
+
+ if (port > 0) {
+ r = ssh_options_set(s->session, SSH_OPTIONS_PORT, &port);
+ if (r < 0) {
+ ret = -EINVAL;
+ session_error_setg(errp, s,
+ "failed to set the port in the libssh session");
+ goto err;
+ }
+ }
+
+ r = ssh_options_set(s->session, SSH_OPTIONS_COMPRESSION, "none");
+ if (r < 0) {
+ ret = -EINVAL;
+ session_error_setg(errp, s,
+ "failed to disable the compression in the libssh "
+ "session");
+ goto err;
+ }
+
+ /* Read ~/.ssh/config. */
+ r = ssh_options_parse_config(s->session, NULL);
+ if (r < 0) {
+ ret = -EINVAL;
+ session_error_setg(errp, s, "failed to parse ~/.ssh/config");
+ goto err;
+ }
+
+ r = ssh_options_set(s->session, SSH_OPTIONS_FD, &new_sock);
+ if (r < 0) {
+ ret = -EINVAL;
+ session_error_setg(errp, s,
+ "failed to set the socket in the libssh session");
+ goto err;
+ }
+ /* libssh took ownership of the socket. */
+ s->sock = new_sock;
+ new_sock = -1;
+
+ /* Connect. */
+ r = ssh_connect(s->session);
+ if (r != SSH_OK) {
ret = -EINVAL;
session_error_setg(errp, s, "failed to establish SSH session");
goto err;
}
/* Check the remote host's key against known_hosts. */
- ret = check_host_key(s, s->inet->host, port, opts->host_key_check, errp);
+ ret = check_host_key(s, opts->host_key_check, errp);
if (ret < 0) {
goto err;
}
/* Authenticate. */
- ret = authenticate(s, s->user, errp);
+ ret = authenticate(s, errp);
if (ret < 0) {
goto err;
}
/* Start SFTP. */
- s->sftp = libssh2_sftp_init(s->session);
+ s->sftp = sftp_new(s->session);
if (!s->sftp) {
- session_error_setg(errp, s, "failed to initialize sftp handle");
+ session_error_setg(errp, s, "failed to create sftp handle");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ r = sftp_init(s->sftp);
+ if (r < 0) {
+ sftp_error_setg(errp, s, "failed to initialize sftp handle");
ret = -EINVAL;
goto err;
}
/* Open the remote file. */
trace_ssh_connect_to_ssh(opts->path, ssh_flags, creat_mode);
- s->sftp_handle = libssh2_sftp_open(s->sftp, opts->path, ssh_flags,
- creat_mode);
+ s->sftp_handle = sftp_open(s->sftp, opts->path, ssh_flags, creat_mode);
if (!s->sftp_handle) {
- session_error_setg(errp, s, "failed to open remote file '%s'",
- opts->path);
+ sftp_error_setg(errp, s, "failed to open remote file '%s'",
+ opts->path);
ret = -EINVAL;
goto err;
}
- r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
- if (r < 0) {
+ /* Make sure the SFTP file is handled in blocking mode. */
+ sftp_file_set_blocking(s->sftp_handle);
+
+ s->attrs = sftp_fstat(s->sftp_handle);
+ if (!s->attrs) {
sftp_error_setg(errp, s, "failed to read file attributes");
return -EINVAL;
}
@@ -729,21 +827,27 @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
return 0;
err:
+ if (s->attrs) {
+ sftp_attributes_free(s->attrs);
+ }
+ s->attrs = NULL;
if (s->sftp_handle) {
- libssh2_sftp_close(s->sftp_handle);
+ sftp_close(s->sftp_handle);
}
s->sftp_handle = NULL;
if (s->sftp) {
- libssh2_sftp_shutdown(s->sftp);
+ sftp_free(s->sftp);
}
s->sftp = NULL;
if (s->session) {
- libssh2_session_disconnect(s->session,
- "from qemu ssh client: "
- "error opening connection");
- libssh2_session_free(s->session);
+ ssh_disconnect(s->session);
+ ssh_free(s->session);
}
s->session = NULL;
+ s->sock = -1;
+ if (new_sock >= 0) {
+ close(new_sock);
+ }
return ret;
}
@@ -758,9 +862,11 @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
ssh_state_init(s);
- ssh_flags = LIBSSH2_FXF_READ;
+ ssh_flags = 0;
if (bdrv_flags & BDRV_O_RDWR) {
- ssh_flags |= LIBSSH2_FXF_WRITE;
+ ssh_flags |= O_RDWR;
+ } else {
+ ssh_flags |= O_RDONLY;
}
opts = ssh_parse_options(options, errp);
@@ -775,18 +881,13 @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
}
/* Go non-blocking. */
- libssh2_session_set_blocking(s->session, 0);
+ ssh_set_blocking(s->session, 0);
qapi_free_BlockdevOptionsSsh(opts);
return 0;
err:
- if (s->sock >= 0) {
- close(s->sock);
- }
- s->sock = -1;
-
qapi_free_BlockdevOptionsSsh(opts);
return ret;
@@ -797,25 +898,25 @@ static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp)
{
ssize_t ret;
char c[1] = { '\0' };
- int was_blocking = libssh2_session_get_blocking(s->session);
+ int was_blocking = ssh_is_blocking(s->session);
/* offset must be strictly greater than the current size so we do
* not overwrite anything */
- assert(offset > 0 && offset > s->attrs.filesize);
+ assert(offset > 0 && offset > s->attrs->size);
- libssh2_session_set_blocking(s->session, 1);
+ ssh_set_blocking(s->session, 1);
- libssh2_sftp_seek64(s->sftp_handle, offset - 1);
- ret = libssh2_sftp_write(s->sftp_handle, c, 1);
+ sftp_seek64(s->sftp_handle, offset - 1);
+ ret = sftp_write(s->sftp_handle, c, 1);
- libssh2_session_set_blocking(s->session, was_blocking);
+ ssh_set_blocking(s->session, was_blocking);
if (ret < 0) {
sftp_error_setg(errp, s, "Failed to grow file");
return -EIO;
}
- s->attrs.filesize = offset;
+ s->attrs->size = offset;
return 0;
}
@@ -843,8 +944,7 @@ static int ssh_co_create(BlockdevCreateOptions *options, Error **errp)
ssh_state_init(&s);
ret = connect_to_ssh(&s, opts->location,
- LIBSSH2_FXF_READ|LIBSSH2_FXF_WRITE|
- LIBSSH2_FXF_CREAT|LIBSSH2_FXF_TRUNC,
+ O_RDWR | O_CREAT | O_TRUNC,
0644, errp);
if (ret < 0) {
goto fail;
@@ -913,10 +1013,8 @@ static int ssh_has_zero_init(BlockDriverState *bs)
/* Assume false, unless we can positively prove it's true. */
int has_zero_init = 0;
- if (s->attrs.flags & LIBSSH2_SFTP_ATTR_PERMISSIONS) {
- if (s->attrs.permissions & LIBSSH2_SFTP_S_IFREG) {
- has_zero_init = 1;
- }
+ if (s->attrs->type == SSH_FILEXFER_TYPE_REGULAR) {
+ has_zero_init = 1;
}
return has_zero_init;
@@ -953,12 +1051,12 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
.co = qemu_coroutine_self()
};
- r = libssh2_session_block_directions(s->session);
+ r = ssh_get_poll_flags(s->session);
- if (r & LIBSSH2_SESSION_BLOCK_INBOUND) {
+ if (r & SSH_READ_PENDING) {
rd_handler = restart_coroutine;
}
- if (r & LIBSSH2_SESSION_BLOCK_OUTBOUND) {
+ if (r & SSH_WRITE_PENDING) {
wr_handler = restart_coroutine;
}
@@ -970,33 +1068,6 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
trace_ssh_co_yield_back(s->sock);
}
-/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
- * in the remote file. Notice that it just updates a field in the
- * sftp_handle structure, so there is no network traffic and it cannot
- * fail.
- *
- * However, `libssh2_sftp_seek64' does have a catastrophic effect on
- * performance since it causes the handle to throw away all in-flight
- * reads and buffered readahead data. Therefore this function tries
- * to be intelligent about when to call the underlying libssh2 function.
- */
-#define SSH_SEEK_WRITE 0
-#define SSH_SEEK_READ 1
-#define SSH_SEEK_FORCE 2
-
-static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags)
-{
- bool op_read = (flags & SSH_SEEK_READ) != 0;
- bool force = (flags & SSH_SEEK_FORCE) != 0;
-
- if (force || op_read != s->offset_op_read || offset != s->offset) {
- trace_ssh_seek(offset);
- libssh2_sftp_seek64(s->sftp_handle, offset);
- s->offset = offset;
- s->offset_op_read = op_read;
- }
-}
-
static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
int64_t offset, size_t size,
QEMUIOVector *qiov)
@@ -1008,7 +1079,8 @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
trace_ssh_read(offset, size);
- ssh_seek(s, offset, SSH_SEEK_READ);
+ trace_ssh_seek(offset);
+ sftp_seek64(s->sftp_handle, offset);
/* This keeps track of the current iovec element ('i'), where we
* will write to next ('buf'), and the end of the current iovec
@@ -1018,35 +1090,35 @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
buf = i->iov_base;
end_of_vec = i->iov_base + i->iov_len;
- /* libssh2 has a hard-coded limit of 2000 bytes per request,
- * although it will also do readahead behind our backs. Therefore
- * we may have to do repeated reads here until we have read 'size'
- * bytes.
- */
for (got = 0; got < size; ) {
+ size_t request_read_size;
again:
- trace_ssh_read_buf(buf, end_of_vec - buf);
- r = libssh2_sftp_read(s->sftp_handle, buf, end_of_vec - buf);
- trace_ssh_read_return(r);
+ /*
+ * The size of SFTP packets is limited to 32K bytes, so limit
+ * the amount of data requested to 16K, as libssh currently
+ * does not handle multiple requests on its own.
+ */
+ request_read_size = MIN(end_of_vec - buf, 16384);
+ trace_ssh_read_buf(buf, end_of_vec - buf, request_read_size);
+ r = sftp_read(s->sftp_handle, buf, request_read_size);
+ trace_ssh_read_return(r, sftp_get_error(s->sftp));
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
+ if (r == SSH_AGAIN) {
co_yield(s, bs);
goto again;
}
- if (r < 0) {
- sftp_error_trace(s, "read");
- s->offset = -1;
- return -EIO;
- }
- if (r == 0) {
+ if (r == SSH_EOF || (r == 0 && sftp_get_error(s->sftp) == SSH_FX_EOF)) {
/* EOF: Short read so pad the buffer with zeroes and return it. */
qemu_iovec_memset(qiov, got, 0, size - got);
return 0;
}
+ if (r <= 0) {
+ sftp_error_trace(s, "read");
+ return -EIO;
+ }
got += r;
buf += r;
- s->offset += r;
if (buf >= end_of_vec && got < size) {
i++;
buf = i->iov_base;
@@ -1083,7 +1155,8 @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
trace_ssh_write(offset, size);
- ssh_seek(s, offset, SSH_SEEK_WRITE);
+ trace_ssh_seek(offset);
+ sftp_seek64(s->sftp_handle, offset);
/* This keeps track of the current iovec element ('i'), where we
* will read from next ('buf'), and the end of the current iovec
@@ -1094,46 +1167,37 @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
end_of_vec = i->iov_base + i->iov_len;
for (written = 0; written < size; ) {
+ size_t request_write_size;
again:
- trace_ssh_write_buf(buf, end_of_vec - buf);
- r = libssh2_sftp_write(s->sftp_handle, buf, end_of_vec - buf);
- trace_ssh_write_return(r);
+ /*
+ * Avoid too large data packets, as libssh currently does not
+ * handle multiple requests on its own.
+ */
+ request_write_size = MIN(end_of_vec - buf, 131072);
+ trace_ssh_write_buf(buf, end_of_vec - buf, request_write_size);
+ r = sftp_write(s->sftp_handle, buf, request_write_size);
+ trace_ssh_write_return(r, sftp_get_error(s->sftp));
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
+ if (r == SSH_AGAIN) {
co_yield(s, bs);
goto again;
}
if (r < 0) {
sftp_error_trace(s, "write");
- s->offset = -1;
return -EIO;
}
- /* The libssh2 API is very unclear about this. A comment in
- * the code says "nothing was acked, and no EAGAIN was
- * received!" which apparently means that no data got sent
- * out, and the underlying channel didn't return any EAGAIN
- * indication. I think this is a bug in either libssh2 or
- * OpenSSH (server-side). In any case, forcing a seek (to
- * discard libssh2 internal buffers), and then trying again
- * works for me.
- */
- if (r == 0) {
- ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE);
- co_yield(s, bs);
- goto again;
- }
written += r;
buf += r;
- s->offset += r;
if (buf >= end_of_vec && written < size) {
i++;
buf = i->iov_base;
end_of_vec = i->iov_base + i->iov_len;
}
- if (offset + written > s->attrs.filesize)
- s->attrs.filesize = offset + written;
+ if (offset + written > s->attrs->size) {
+ s->attrs->size = offset + written;
+ }
}
return 0;
@@ -1168,24 +1232,24 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
}
}
-#ifdef HAS_LIBSSH2_SFTP_FSYNC
+#ifdef HAVE_LIBSSH_0_8
static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
{
int r;
trace_ssh_flush();
+
+ if (!sftp_extension_supported(s->sftp, "fsync@openssh.com", "1")) {
+ unsafe_flush_warning(s, "OpenSSH >= 6.3");
+ return 0;
+ }
again:
- r = libssh2_sftp_fsync(s->sftp_handle);
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
+ r = sftp_fsync(s->sftp_handle);
+ if (r == SSH_AGAIN) {
co_yield(s, bs);
goto again;
}
- if (r == LIBSSH2_ERROR_SFTP_PROTOCOL &&
- libssh2_sftp_last_error(s->sftp) == LIBSSH2_FX_OP_UNSUPPORTED) {
- unsafe_flush_warning(s, "OpenSSH >= 6.3");
- return 0;
- }
if (r < 0) {
sftp_error_trace(s, "fsync");
return -EIO;
@@ -1206,25 +1270,25 @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
return ret;
}
-#else /* !HAS_LIBSSH2_SFTP_FSYNC */
+#else /* !HAVE_LIBSSH_0_8 */
static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
{
BDRVSSHState *s = bs->opaque;
- unsafe_flush_warning(s, "libssh2 >= 1.4.4");
+ unsafe_flush_warning(s, "libssh >= 0.8.0");
return 0;
}
-#endif /* !HAS_LIBSSH2_SFTP_FSYNC */
+#endif /* !HAVE_LIBSSH_0_8 */
static int64_t ssh_getlength(BlockDriverState *bs)
{
BDRVSSHState *s = bs->opaque;
int64_t length;
- /* Note we cannot make a libssh2 call here. */
- length = (int64_t) s->attrs.filesize;
+ /* Note we cannot make a libssh call here. */
+ length = (int64_t) s->attrs->size;
trace_ssh_getlength(length);
return length;
@@ -1241,12 +1305,12 @@ static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset,
return -ENOTSUP;
}
- if (offset < s->attrs.filesize) {
+ if (offset < s->attrs->size) {
error_setg(errp, "ssh driver does not support shrinking files");
return -ENOTSUP;
}
- if (offset == s->attrs.filesize) {
+ if (offset == s->attrs->size) {
return 0;
}
@@ -1341,12 +1405,16 @@ static void bdrv_ssh_init(void)
{
int r;
- r = libssh2_init(0);
+ r = ssh_init();
if (r != 0) {
- fprintf(stderr, "libssh2 initialization failed, %d\n", r);
+ fprintf(stderr, "libssh initialization failed, %d\n", r);
exit(EXIT_FAILURE);
}
+#if TRACE_LIBSSH != 0
+ ssh_set_log_level(TRACE_LIBSSH);
+#endif
+
bdrv_register(&bdrv_ssh);
}
diff --git a/block/trace-events b/block/trace-events
index 9ccea755da..d724df0117 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -171,19 +171,21 @@ nbd_client_connect_success(const char *export_name) "export '%s'"
# ssh.c
ssh_restart_coroutine(void *co) "co=%p"
ssh_flush(void) "fsync"
-ssh_check_host_key_knownhosts(const char *key) "host key OK: %s"
+ssh_check_host_key_knownhosts(void) "host key OK"
ssh_connect_to_ssh(char *path, int flags, int mode) "opening file %s flags=0x%x creat_mode=0%o"
ssh_co_yield(int sock, void *rd_handler, void *wr_handler) "s->sock=%d rd_handler=%p wr_handler=%p"
ssh_co_yield_back(int sock) "s->sock=%d - back"
ssh_getlength(int64_t length) "length=%" PRIi64
ssh_co_create_opts(uint64_t size) "total_size=%" PRIu64
ssh_read(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
-ssh_read_buf(void *buf, size_t size) "sftp_read buf=%p size=%zu"
-ssh_read_return(ssize_t ret) "sftp_read returned %zd"
+ssh_read_buf(void *buf, size_t size, size_t actual_size) "sftp_read buf=%p size=%zu (actual size=%zu)"
+ssh_read_return(ssize_t ret, int sftp_err) "sftp_read returned %zd (sftp error=%d)"
ssh_write(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
-ssh_write_buf(void *buf, size_t size) "sftp_write buf=%p size=%zu"
-ssh_write_return(ssize_t ret) "sftp_write returned %zd"
+ssh_write_buf(void *buf, size_t size, size_t actual_size) "sftp_write buf=%p size=%zu (actual size=%zu)"
+ssh_write_return(ssize_t ret, int sftp_err) "sftp_write returned %zd (sftp error=%d)"
ssh_seek(int64_t offset) "seeking to offset=%" PRIi64
+ssh_auth_methods(int methods) "auth methods=0x%x"
+ssh_server_status(int status) "server status=%d"
# curl.c
curl_timer_cb(long timeout_ms) "timer callback timeout_ms %ld"
@@ -216,4 +218,4 @@ sheepdog_snapshot_create(const char *sn_name, const char *id) "%s %s"
sheepdog_snapshot_create_inode(const char *name, uint32_t snap, uint32_t vdi) "s->inode: name %s snap_id 0x%" PRIx32 " vdi 0x%" PRIx32
# ssh.c
-sftp_error(const char *op, const char *ssh_err, int ssh_err_code, unsigned long sftp_err_code) "%s failed: %s (libssh2 error code: %d, sftp error code: %lu)"
+sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)"
diff --git a/block/vmdk.c b/block/vmdk.c
index 51067c774f..bd36ece125 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -91,6 +91,44 @@ typedef struct {
uint16_t compressAlgorithm;
} QEMU_PACKED VMDK4Header;
+typedef struct VMDKSESparseConstHeader {
+ uint64_t magic;
+ uint64_t version;
+ uint64_t capacity;
+ uint64_t grain_size;
+ uint64_t grain_table_size;
+ uint64_t flags;
+ uint64_t reserved1;
+ uint64_t reserved2;
+ uint64_t reserved3;
+ uint64_t reserved4;
+ uint64_t volatile_header_offset;
+ uint64_t volatile_header_size;
+ uint64_t journal_header_offset;
+ uint64_t journal_header_size;
+ uint64_t journal_offset;
+ uint64_t journal_size;
+ uint64_t grain_dir_offset;
+ uint64_t grain_dir_size;
+ uint64_t grain_tables_offset;
+ uint64_t grain_tables_size;
+ uint64_t free_bitmap_offset;
+ uint64_t free_bitmap_size;
+ uint64_t backmap_offset;
+ uint64_t backmap_size;
+ uint64_t grains_offset;
+ uint64_t grains_size;
+ uint8_t pad[304];
+} QEMU_PACKED VMDKSESparseConstHeader;
+
+typedef struct VMDKSESparseVolatileHeader {
+ uint64_t magic;
+ uint64_t free_gt_number;
+ uint64_t next_txn_seq_number;
+ uint64_t replay_journal;
+ uint8_t pad[480];
+} QEMU_PACKED VMDKSESparseVolatileHeader;
+
#define L2_CACHE_SIZE 16
typedef struct VmdkExtent {
@@ -99,19 +137,23 @@ typedef struct VmdkExtent {
bool compressed;
bool has_marker;
bool has_zero_grain;
+ bool sesparse;
+ uint64_t sesparse_l2_tables_offset;
+ uint64_t sesparse_clusters_offset;
+ int32_t entry_size;
int version;
int64_t sectors;
int64_t end_sector;
int64_t flat_start_offset;
int64_t l1_table_offset;
int64_t l1_backup_table_offset;
- uint32_t *l1_table;
+ void *l1_table;
uint32_t *l1_backup_table;
unsigned int l1_size;
uint32_t l1_entry_sectors;
unsigned int l2_size;
- uint32_t *l2_cache;
+ void *l2_cache;
uint32_t l2_cache_offsets[L2_CACHE_SIZE];
uint32_t l2_cache_counts[L2_CACHE_SIZE];
@@ -425,11 +467,22 @@ static int vmdk_add_extent(BlockDriverState *bs,
error_setg(errp, "Invalid granularity, image may be corrupt");
return -EFBIG;
}
- if (l1_size > 512 * 1024 * 1024) {
- /* Although with big capacity and small l1_entry_sectors, we can get a
+ if (l1_size > 32 * 1024 * 1024) {
+ /*
+ * Although with big capacity and small l1_entry_sectors, we can get a
* big l1_size, we don't want unbounded value to allocate the table.
- * Limit it to 512M, which is 16PB for default cluster and L2 table
- * size */
+ * Limit it to 32M, which is enough to store:
+ * 8TB - for both VMDK3 & VMDK4 with
+ * minimal cluster size: 512B
+ * minimal L2 table size: 512 entries
+ * 8 TB is still more than the maximal value supported for
+ * VMDK3 & VMDK4 which is 2TB.
+ * 64TB - for "ESXi seSparse Extent"
+ * minimal cluster size: 512B (default is 4KB)
+ * L2 table size: 4096 entries (const).
+ * 64TB is more than the maximal value supported for
+ * seSparse VMDKs (which is slightly less than 64TB)
+ */
error_setg(errp, "L1 size too big");
return -EFBIG;
}
@@ -454,6 +507,7 @@ static int vmdk_add_extent(BlockDriverState *bs,
extent->l2_size = l2_size;
extent->cluster_sectors = flat ? sectors : cluster_sectors;
extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors);
+ extent->entry_size = sizeof(uint32_t);
if (s->num_extents > 1) {
extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
@@ -475,7 +529,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
int i;
/* read the L1 table */
- l1_size = extent->l1_size * sizeof(uint32_t);
+ l1_size = extent->l1_size * extent->entry_size;
extent->l1_table = g_try_malloc(l1_size);
if (l1_size && extent->l1_table == NULL) {
return -ENOMEM;
@@ -493,10 +547,16 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
goto fail_l1;
}
for (i = 0; i < extent->l1_size; i++) {
- le32_to_cpus(&extent->l1_table[i]);
+ if (extent->entry_size == sizeof(uint64_t)) {
+ le64_to_cpus((uint64_t *)extent->l1_table + i);
+ } else {
+ assert(extent->entry_size == sizeof(uint32_t));
+ le32_to_cpus((uint32_t *)extent->l1_table + i);
+ }
}
if (extent->l1_backup_table_offset) {
+ assert(!extent->sesparse);
extent->l1_backup_table = g_try_malloc(l1_size);
if (l1_size && extent->l1_backup_table == NULL) {
ret = -ENOMEM;
@@ -519,7 +579,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
}
extent->l2_cache =
- g_new(uint32_t, extent->l2_size * L2_CACHE_SIZE);
+ g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE);
return 0;
fail_l1b:
g_free(extent->l1_backup_table);
@@ -565,6 +625,205 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
return ret;
}
+#define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe)
+#define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe)
+
+/* Strict checks - format not officially documented */
+static int check_se_sparse_const_header(VMDKSESparseConstHeader *header,
+ Error **errp)
+{
+ header->magic = le64_to_cpu(header->magic);
+ header->version = le64_to_cpu(header->version);
+ header->grain_size = le64_to_cpu(header->grain_size);
+ header->grain_table_size = le64_to_cpu(header->grain_table_size);
+ header->flags = le64_to_cpu(header->flags);
+ header->reserved1 = le64_to_cpu(header->reserved1);
+ header->reserved2 = le64_to_cpu(header->reserved2);
+ header->reserved3 = le64_to_cpu(header->reserved3);
+ header->reserved4 = le64_to_cpu(header->reserved4);
+
+ header->volatile_header_offset =
+ le64_to_cpu(header->volatile_header_offset);
+ header->volatile_header_size = le64_to_cpu(header->volatile_header_size);
+
+ header->journal_header_offset = le64_to_cpu(header->journal_header_offset);
+ header->journal_header_size = le64_to_cpu(header->journal_header_size);
+
+ header->journal_offset = le64_to_cpu(header->journal_offset);
+ header->journal_size = le64_to_cpu(header->journal_size);
+
+ header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset);
+ header->grain_dir_size = le64_to_cpu(header->grain_dir_size);
+
+ header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset);
+ header->grain_tables_size = le64_to_cpu(header->grain_tables_size);
+
+ header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset);
+ header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size);
+
+ header->backmap_offset = le64_to_cpu(header->backmap_offset);
+ header->backmap_size = le64_to_cpu(header->backmap_size);
+
+ header->grains_offset = le64_to_cpu(header->grains_offset);
+ header->grains_size = le64_to_cpu(header->grains_size);
+
+ if (header->magic != SESPARSE_CONST_HEADER_MAGIC) {
+ error_setg(errp, "Bad const header magic: 0x%016" PRIx64,
+ header->magic);
+ return -EINVAL;
+ }
+
+ if (header->version != 0x0000000200000001) {
+ error_setg(errp, "Unsupported version: 0x%016" PRIx64,
+ header->version);
+ return -ENOTSUP;
+ }
+
+ if (header->grain_size != 8) {
+ error_setg(errp, "Unsupported grain size: %" PRIu64,
+ header->grain_size);
+ return -ENOTSUP;
+ }
+
+ if (header->grain_table_size != 64) {
+ error_setg(errp, "Unsupported grain table size: %" PRIu64,
+ header->grain_table_size);
+ return -ENOTSUP;
+ }
+
+ if (header->flags != 0) {
+ error_setg(errp, "Unsupported flags: 0x%016" PRIx64,
+ header->flags);
+ return -ENOTSUP;
+ }
+
+ if (header->reserved1 != 0 || header->reserved2 != 0 ||
+ header->reserved3 != 0 || header->reserved4 != 0) {
+ error_setg(errp, "Unsupported reserved bits:"
+ " 0x%016" PRIx64 " 0x%016" PRIx64
+ " 0x%016" PRIx64 " 0x%016" PRIx64,
+ header->reserved1, header->reserved2,
+ header->reserved3, header->reserved4);
+ return -ENOTSUP;
+ }
+
+ /* check that padding is 0 */
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
+ error_setg(errp, "Unsupported non-zero const header padding");
+ return -ENOTSUP;
+ }
+
+ return 0;
+}
+
+static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header,
+ Error **errp)
+{
+ header->magic = le64_to_cpu(header->magic);
+ header->free_gt_number = le64_to_cpu(header->free_gt_number);
+ header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number);
+ header->replay_journal = le64_to_cpu(header->replay_journal);
+
+ if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) {
+ error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64,
+ header->magic);
+ return -EINVAL;
+ }
+
+ if (header->replay_journal) {
+ error_setg(errp, "Image is dirty, Replaying journal not supported");
+ return -ENOTSUP;
+ }
+
+ /* check that padding is 0 */
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
+ error_setg(errp, "Unsupported non-zero volatile header padding");
+ return -ENOTSUP;
+ }
+
+ return 0;
+}
+
+static int vmdk_open_se_sparse(BlockDriverState *bs,
+ BdrvChild *file,
+ int flags, Error **errp)
+{
+ int ret;
+ VMDKSESparseConstHeader const_header;
+ VMDKSESparseVolatileHeader volatile_header;
+ VmdkExtent *extent;
+
+ ret = bdrv_apply_auto_read_only(bs,
+ "No write support for seSparse images available", errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ assert(sizeof(const_header) == SECTOR_SIZE);
+
+ ret = bdrv_pread(file, 0, &const_header, sizeof(const_header));
+ if (ret < 0) {
+ bdrv_refresh_filename(file->bs);
+ error_setg_errno(errp, -ret,
+ "Could not read const header from file '%s'",
+ file->bs->filename);
+ return ret;
+ }
+
+ /* check const header */
+ ret = check_se_sparse_const_header(&const_header, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ assert(sizeof(volatile_header) == SECTOR_SIZE);
+
+ ret = bdrv_pread(file,
+ const_header.volatile_header_offset * SECTOR_SIZE,
+ &volatile_header, sizeof(volatile_header));
+ if (ret < 0) {
+ bdrv_refresh_filename(file->bs);
+ error_setg_errno(errp, -ret,
+ "Could not read volatile header from file '%s'",
+ file->bs->filename);
+ return ret;
+ }
+
+ /* check volatile header */
+ ret = check_se_sparse_volatile_header(&volatile_header, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = vmdk_add_extent(bs, file, false,
+ const_header.capacity,
+ const_header.grain_dir_offset * SECTOR_SIZE,
+ 0,
+ const_header.grain_dir_size *
+ SECTOR_SIZE / sizeof(uint64_t),
+ const_header.grain_table_size *
+ SECTOR_SIZE / sizeof(uint64_t),
+ const_header.grain_size,
+ &extent,
+ errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ extent->sesparse = true;
+ extent->sesparse_l2_tables_offset = const_header.grain_tables_offset;
+ extent->sesparse_clusters_offset = const_header.grains_offset;
+ extent->entry_size = sizeof(uint64_t);
+
+ ret = vmdk_init_tables(bs, extent, errp);
+ if (ret) {
+ /* free extent allocated by vmdk_add_extent */
+ vmdk_free_last_extent(bs);
+ }
+
+ return ret;
+}
+
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
QDict *options, Error **errp);
@@ -842,6 +1101,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
* RW [size in sectors] SPARSE "file-name.vmdk"
* RW [size in sectors] VMFS "file-name.vmdk"
* RW [size in sectors] VMFSSPARSE "file-name.vmdk"
+ * RW [size in sectors] SESPARSE "file-name.vmdk"
*/
flat_offset = -1;
matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
@@ -864,7 +1124,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
if (sectors <= 0 ||
(strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
- strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
+ strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") &&
+ strcmp(type, "SESPARSE")) ||
(strcmp(access, "RW"))) {
continue;
}
@@ -917,6 +1178,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
return ret;
}
extent = &s->extents[s->num_extents - 1];
+ } else if (!strcmp(type, "SESPARSE")) {
+ ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
+ if (ret) {
+ bdrv_unref_child(bs, extent_file);
+ return ret;
+ }
+ extent = &s->extents[s->num_extents - 1];
} else {
error_setg(errp, "Unsupported extent type '%s'", type);
bdrv_unref_child(bs, extent_file);
@@ -951,6 +1219,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
if (strcmp(ct, "monolithicFlat") &&
strcmp(ct, "vmfs") &&
strcmp(ct, "vmfsSparse") &&
+ strcmp(ct, "seSparse") &&
strcmp(ct, "twoGbMaxExtentSparse") &&
strcmp(ct, "twoGbMaxExtentFlat")) {
error_setg(errp, "Unsupported image type '%s'", ct);
@@ -1201,10 +1470,12 @@ static int get_cluster_offset(BlockDriverState *bs,
{
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
- uint32_t min_count, *l2_table;
+ uint32_t min_count;
+ void *l2_table;
bool zeroed = false;
int64_t ret;
int64_t cluster_sector;
+ unsigned int l2_size_bytes = extent->l2_size * extent->entry_size;
if (m_data) {
m_data->valid = 0;
@@ -1219,7 +1490,36 @@ static int get_cluster_offset(BlockDriverState *bs,
if (l1_index >= extent->l1_size) {
return VMDK_ERROR;
}
- l2_offset = extent->l1_table[l1_index];
+ if (extent->sesparse) {
+ uint64_t l2_offset_u64;
+
+ assert(extent->entry_size == sizeof(uint64_t));
+
+ l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index];
+ if (l2_offset_u64 == 0) {
+ l2_offset = 0;
+ } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) {
+ /*
+ * Top most nibble is 0x1 if grain table is allocated.
+ * strict check - top most 4 bytes must be 0x10000000 since max
+ * supported size is 64TB for disk - so no more than 64TB / 16MB
+ * grain directories which is smaller than uint32,
+ * where 16MB is the only supported default grain table coverage.
+ */
+ return VMDK_ERROR;
+ } else {
+ l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff;
+ l2_offset_u64 = extent->sesparse_l2_tables_offset +
+ l2_offset_u64 * l2_size_bytes / SECTOR_SIZE;
+ if (l2_offset_u64 > 0x00000000ffffffff) {
+ return VMDK_ERROR;
+ }
+ l2_offset = (unsigned int)(l2_offset_u64);
+ }
+ } else {
+ assert(extent->entry_size == sizeof(uint32_t));
+ l2_offset = ((uint32_t *)extent->l1_table)[l1_index];
+ }
if (!l2_offset) {
return VMDK_UNALLOC;
}
@@ -1231,7 +1531,7 @@ static int get_cluster_offset(BlockDriverState *bs,
extent->l2_cache_counts[j] >>= 1;
}
}
- l2_table = extent->l2_cache + (i * extent->l2_size);
+ l2_table = (char *)extent->l2_cache + (i * l2_size_bytes);
goto found;
}
}
@@ -1244,13 +1544,13 @@ static int get_cluster_offset(BlockDriverState *bs,
min_index = i;
}
}
- l2_table = extent->l2_cache + (min_index * extent->l2_size);
+ l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes);
BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD);
if (bdrv_pread(extent->file,
(int64_t)l2_offset * 512,
l2_table,
- extent->l2_size * sizeof(uint32_t)
- ) != extent->l2_size * sizeof(uint32_t)) {
+ l2_size_bytes
+ ) != l2_size_bytes) {
return VMDK_ERROR;
}
@@ -1258,16 +1558,45 @@ static int get_cluster_offset(BlockDriverState *bs,
extent->l2_cache_counts[min_index] = 1;
found:
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
- cluster_sector = le32_to_cpu(l2_table[l2_index]);
- if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
- zeroed = true;
+ if (extent->sesparse) {
+ cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]);
+ switch (cluster_sector & 0xf000000000000000) {
+ case 0x0000000000000000:
+ /* unallocated grain */
+ if (cluster_sector != 0) {
+ return VMDK_ERROR;
+ }
+ break;
+ case 0x1000000000000000:
+ /* scsi-unmapped grain - fallthrough */
+ case 0x2000000000000000:
+ /* zero grain */
+ zeroed = true;
+ break;
+ case 0x3000000000000000:
+ /* allocated grain */
+ cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) |
+ ((cluster_sector & 0x0000ffffffffffff) << 12));
+ cluster_sector = extent->sesparse_clusters_offset +
+ cluster_sector * extent->cluster_sectors;
+ break;
+ default:
+ return VMDK_ERROR;
+ }
+ } else {
+ cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]);
+
+ if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
+ zeroed = true;
+ }
}
if (!cluster_sector || zeroed) {
if (!allocate) {
return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
}
+ assert(!extent->sesparse);
if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) {
return VMDK_ERROR;
@@ -1291,7 +1620,7 @@ static int get_cluster_offset(BlockDriverState *bs,
m_data->l1_index = l1_index;
m_data->l2_index = l2_index;
m_data->l2_offset = l2_offset;
- m_data->l2_cache_entry = &l2_table[l2_index];
+ m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
}
}
*cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
@@ -1617,6 +1946,9 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
if (!extent) {
return -EIO;
}
+ if (extent->sesparse) {
+ return -ENOTSUP;
+ }
offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
- offset_in_cluster);