/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu-common.h"
#include "qemu/iov.h"
#include "qemu/sockets.h"
#include "block/coroutine.h"
#include "migration/migration.h"
#include "migration/qemu-file.h"
#include "trace.h"
#define IO_BUF_SIZE 32768
#define MAX_IOV_SIZE MIN(IOV_MAX, 64)
struct QEMUFile {
const QEMUFileOps *ops;
void *opaque;
int64_t bytes_xfer;
int64_t xfer_limit;
int64_t pos; /* start of buffer when writing, end of buffer
when reading */
int buf_index;
int buf_size; /* 0 when writing */
uint8_t buf[IO_BUF_SIZE];
struct iovec iov[MAX_IOV_SIZE];
unsigned int iovcnt;
int last_error;
};
bool qemu_file_mode_is_not_valid(const char *mode)
{
if (mode == NULL ||
(mode[0] != 'r' && mode[0] != 'w') ||
mode[1] != 'b' || mode[2] != 0) {
fprintf(stderr, "qemu_fopen: Argument validity check failed\n");
return true;
}
return false;
}
QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
{
QEMUFile *f;
f = g_malloc0(sizeof(QEMUFile));
f->opaque = opaque;
f->ops = ops;
return f;
}
/*
* Get last error for stream f
*
* Return negative error value if there has been an error on previous
* operations, return 0 if no error happened.
*
*/
int qemu_file_get_error(QEMUFile *f)
{
return f->last_error;
}
void qemu_file_set_error(QEMUFile *f, int ret)
{
if (f->last_error == 0) {
f->last_error = ret;
}
}
bool qemu_file_is_writable(QEMUFile *f)
{
return f->ops->writev_buffer || f->ops->put_buffer;
}
/**
* Flushes QEMUFile buffer
*
* If there is writev_buffer QEMUFileOps it uses it otherwise uses
* put_buffer ops.
*/
void qemu_fflush(QEMUFile *f)
{
ssize_t ret = 0;
if (!qemu_file_is_writable(f)) {
return;
}
if (f->ops->writev_buffer) {
if (f->iovcnt > 0) {
ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos);
}
} else {
if (f->buf_index > 0) {
ret = f->ops->put_buffer(f->opaque, f->buf, f->pos, f->buf_index);
}
}
if (ret >= 0) {
f->pos += ret;
}
f->buf_index = 0;
f->iovcnt = 0;
if (ret < 0) {
qemu_file_set_error(f, ret);
}
}
void ram_control_before_iterate(QEMUFile *f, uint64_t flags)
{
int ret = 0;
if (f->ops->before_ram_iterate) {
ret = f->ops->before_ram_iterate(f, f->opaque, flags);
if (ret < 0) {
qemu_file_set_error(f, ret);
}
}
}
void ram_control_after_iterate(QEMUFile *f, uint64_t flags)
{
int ret = 0;
if (f->ops->after_ram_iterate) {
ret = f->ops->after_ram_iterate(f, f->opaque, flags);
if (ret < 0) {
qemu_file_set_error(f, ret);
}
}
}
void ram_control_load_hook(QEMUFile *f, uint64_t flags)
{
int ret = -EINVAL;
if (f->ops->hook_ram_load) {
ret = f->ops->hook_ram_load(f, f->opaque, flags);
if (ret < 0) {
qemu_file_set_error(f, ret);
}
} else {
qemu_file_set_error(f, ret);
}
}
size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
ram_addr_t offset, size_t size, int *bytes_sent)
{
if (f->ops->save_page) {
int ret = f->ops->save_page(f, f->opaque, block_offset,
offset, size, bytes_sent);
if (ret != RAM_SAVE_CONTROL_DELAYED) {
if (bytes_sent && *bytes_sent > 0) {
qemu_update_position(f, *bytes_sent);
} else if (ret < 0) {
qemu_file_set_error(f, ret);
}
}
return ret;
}
return RAM_SAVE_CONTROL_NOT_SUPP;
}
/*
* Attempt to fill the buffer from the underlying file
* Returns the number of bytes read, or negative value for an error.
*
* Note that it can return a partially full buffer even in a not error/not EOF
* case if the underlying file descriptor gives a short read, and that can
* happen even on a blocking fd.
*/
static ssize_t qemu_fill_buffer(QEMUFile *f)
{
int len;
int pending;
assert(!qemu_file_is_writable(f));
pending = f->buf_size - f->buf_index;
if (pending > 0) {
memmove(f->buf, f->buf + f->buf_index, pending);
}
f->buf_index = 0;
f->buf_size = pending;
len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos,
IO_BUF_SIZE - pending);
if (len > 0) {
f->buf_size += len;
f->pos += len;
} else if (len == 0) {
qemu_file_set_error(f, -EIO);
} else if (len != -EAGAIN) {
qemu_file_set_error(f, len);
}
return len;
}
int qemu_get_fd(QEMUFile *f)
{
if (f->ops->get_fd) {
return f->ops->get_fd(f->opaque);
}
return -1;
}
void qemu_update_position(QEMUFile *f, size_t size)
{
f->pos += size;
}
/** Closes the file
*
* Returns negative error value if any error happened on previous operations or
* while closing the file. Returns 0 or positive number on success.
*
* The meaning of return value on success depends on the specific backend
* being used.
*/
int qemu_fclose(QEMUFile *f)
{
int ret;
qemu_fflush(f);
ret = qemu_file_get_error(f);
if (f->ops->close) {
int ret2 = f->ops->close(f->opaque);
if (ret >= 0) {
ret = ret2;
}
}
/* If any error was spotted before closing, we should report it
* instead of the close() return value.
*/
if (f->last_error) {
ret = f->last_error;
}
g_free(f);
trace_qemu_file_fclose();
return ret;
}
static void add_to_iovec(QEMUFile *f, const uint8_t *buf, int size)
{
/* check for adjacent buffer and coalesce them */
if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base +
f->iov[f->iovcnt - 1].iov_len) {
f->iov[f->iovcnt - 1].iov_len += size;
} else {
f->iov[f->iovcnt].iov_base = (uint8_t *)buf;
f->iov[f->iovcnt++].iov_len = size;
}
if (f->iovcnt >= MAX_IOV_SIZE) {
qemu_fflush(f);
}
}
void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size)
{
if (!f->ops->writev_buffer) {
qemu_put_buffer(f, buf, size);
return;
}
if (f->last_error) {
return;
}
f->bytes_xfer += size;
add_to_iovec(f, buf, size);
}
void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
{
int l;
if (f->last_error) {
return;
}
while (size > 0) {
l = IO_BUF_SIZE - f->buf_index;
if (l > size) {
l = size;
}
memcpy(f->buf + f->buf_index, buf, l);
f->bytes_xfer += l;
if (f->ops->writev_buffer) {
add_to_iovec(f, f->buf + f->buf_index, l);
}
f->buf_index += l;
if (f->buf_index == IO_BUF_SIZE) {
qemu_fflush(f);
}
if (qemu_file_get_error(f)) {
break;
}
buf += l;
size -= l;
}
}
void qemu_put_byte(QEMUFile *f, int v)
{
if (f->last_error) {
return;
}
f->buf[f->buf_index] = v;
f->bytes_xfer++;
if (f->ops->writev_buffer) {
add_to_iovec(f, f->buf + f->buf_index, 1);
}
f->buf_index++;
if (f->buf_index == IO_BUF_SIZE) {
qemu_fflush(f);
}
}
void qemu_file_skip(QEMUFile *f, int size)
{
if (f->buf_index + size <= f->buf_size) {
f->buf_index += size;
}
}
/*
* Read 'size' bytes from file (at 'offset') into buf without moving the
* pointer.
*
* It will return size bytes unless there was an error, in which case it will
* return as many as it managed to read (assuming blocking fd's which
* all current QEMUFile are)
*/
int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset)
{
int pending;
int index;
assert(!qemu_file_is_writable(f));
assert(offset < IO_BUF_SIZE);
assert(size <= IO_BUF_SIZE - offset);
/* The 1st byte to read from */
index = f->buf_index + offset;
/* The number of available bytes starting at index */
pending = f->buf_size - index;
/*
* qemu_fill_buffer might return just a few bytes, even when there isn't
* an error, so loop collecting them until we get enough.
*/
while (pending < size) {
int received = qemu_fill_buffer(f);
if (received <= 0) {
break;
}
index = f->buf_index + offset;
pending = f->buf_size - index;
}
if (pending <= 0) {
return 0;
}
if (size > pending) {
size = pending;
}
memcpy(buf, f->buf + index, size);
return size;
}
/*
* Read 'size' bytes of data from the file into buf.
* 'size' can be larger than the internal buffer.
*
* It will return size bytes unless there was an error, in which case it will
* return as many as it managed to read (assuming blocking fd's which
* all current QEMUFile are)
*/
int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size)
{
int pending = size;
int done = 0;
while (pending > 0) {
int res;
res = qemu_peek_buffer(f, buf, MIN(pending, IO_BUF_SIZE), 0);
if (res == 0) {
return done;
}
qemu_file_skip(f, res);
buf += res;
pending -= res;
done += res;
}
return done;
}
/*
* Peeks a single byte from the buffer; this isn't guaranteed to work if
* offset leaves a gap after the previous read/peeked data.
*/
int qemu_peek_byte(QEMUFile *f, int offset)
{
int index = f->buf_index + offset;
assert(!qemu_file_is_writable(f));
assert(offset < IO_BUF_SIZE);
if (index >= f->buf_size) {
qemu_fill_buffer(f);
index = f->buf_index + offset;
if (index >= f->buf_size) {
return 0;
}
}
return f->buf[index];
}
int qemu_get_byte(QEMUFile *f)
{
int result;
result = qemu_peek_byte(f, 0);
qemu_file_skip(f, 1);
return result;
}
int64_t qemu_ftell(QEMUFile *f)
{
qemu_fflush(f);
return f->pos;
}
int qemu_file_rate_limit(QEMUFile *f)
{
if (qemu_file_get_error(f)) {
return 1;
}
if (f->xfer_limit > 0 && f->bytes_xfer > f->xfer_limit) {
return 1;
}
return 0;
}
int64_t qemu_file_get_rate_limit(QEMUFile *f)
{
return f->xfer_limit;
}
void qemu_file_set_rate_limit(QEMUFile *f, int64_t limit)
{
f->xfer_limit = limit;
}
void qemu_file_reset_rate_limit(QEMUFile *f)
{
f->bytes_xfer = 0;
}
void qemu_put_be16(QEMUFile *f, unsigned int v)
{
qemu_put_byte(f, v >> 8);
qemu_put_byte(f, v);
}
void qemu_put_be32(QEMUFile *f, unsigned int v)
{
qemu_put_byte(f, v >> 24);
qemu_put_byte(f, v >> 16);
qemu_put_byte(f, v >> 8);
qemu_put_byte(f, v);
}
void qemu_put_be64(QEMUFile *f, uint64_t v)
{
qemu_put_be32(f, v >> 32);
qemu_put_be32(f, v);
}
unsigned int qemu_get_be16(QEMUFile *f)
{
unsigned int v;
v = qemu_get_byte(f) << 8;
v |= qemu_get_byte(f);
return v;
}
unsigned int qemu_get_be32(QEMUFile *f)
{
unsigned int v;
v = qemu_get_byte(f) << 24;
v |= qemu_get_byte(f) << 16;
v |= qemu_get_byte(f) << 8;
v |= qemu_get_byte(f);
return v;
}
uint64_t qemu_get_be64(QEMUFile *f)
{
uint64_t v;
v = (uint64_t)qemu_get_be32(f) << 32;
v |= qemu_get_be32(f);
return v;
}
#define QSB_CHUNK_SIZE (1 << 10)
#define QSB_MAX_CHUNK_SIZE (16 * QSB_CHUNK_SIZE)
/**
* Create a QEMUSizedBuffer
* This type of buffer uses scatter-gather lists internally and
* can grow to any size. Any data array in the scatter-gather list
* can hold different amount of bytes.
*
* @buffer: Optional buffer to copy into the QSB
* @len: size of initial buffer; if @buffer is given, buffer must
* hold at least len bytes
*
* Returns a pointer to a QEMUSizedBuffer or NULL on allocation failure
*/
QEMUSizedBuffer *qsb_create(const uint8_t *buffer, size_t len)
{
QEMUSizedBuffer *qsb;
size_t alloc_len, num_chunks, i, to_copy;
size_t chunk_size = (len > QSB_MAX_CHUNK_SIZE)
? QSB_MAX_CHUNK_SIZE
: QSB_CHUNK_SIZE;
num_chunks = DIV_ROUND_UP(len ? len : QSB_CHUNK_SIZE, chunk_size);
alloc_len = num_chunks * chunk_size;
qsb = g_try_new0(QEMUSizedBuffer, 1);
if (!qsb) {
return NULL;
}
qsb->iov = g_try_new0(struct iovec, num_chunks);
if (!qsb->iov) {
g_free(qsb);
return NULL;
}
qsb->n_iov = num_chunks;
for (i = 0; i < num_chunks; i++) {
qsb->iov[i].iov_base = g_try_malloc0(chunk_size);
if (!qsb->iov[i].iov_base) {
/* qsb_free is safe since g_free can cope with NULL */
qsb_free(qsb);
return NULL;
}
qsb->iov[i].iov_len = chunk_size;
if (buffer) {
to_copy = (len - qsb->used) > chunk_size
? chunk_size : (len - qsb->used);
memcpy(qsb->iov[i].iov_base, &buffer[qsb->used], to_copy);
qsb->used += to_copy;
}
}
qsb->size = alloc_len;
return qsb;
}
/**
* Free the QEMUSizedBuffer
*
* @qsb: The QEMUSizedBuffer to free
*/
void qsb_free(QEMUSizedBuffer *qsb)
{
size_t i;
if (!qsb) {
return;
}
for (i = 0; i < qsb->n_iov; i++) {
g_free(qsb->iov[i].iov_base);
}
g_free(qsb->iov);
g_free(qsb);
}
/**
* Get the number of used bytes in the QEMUSizedBuffer
*
* @qsb: A QEMUSizedBuffer
*
* Returns the number of bytes currently used in this buffer
*/
size_t qsb_get_length(const QEMUSizedBuffer *qsb)
{
return qsb->used;
}
/**
* Set the length of the buffer; the primary usage of this
* function is to truncate the number of used bytes in the buffer.
* The size will not be extended beyond the current number of
* allocated bytes in the QEMUSizedBuffer.
*
* @qsb: A QEMUSizedBuffer
* @new_len: The new length of bytes in the buffer
*
* Returns the number of bytes the buffer was truncated or extended
* to.
*/
size_t qsb_set_length(QEMUSizedBuffer *qsb, size_t new_len)
{
if (new_len <= qsb->size) {
qsb->used = new_len;
} else {
qsb->used = qsb->size;
}
return qsb->used;
}
/**
* Get the iovec that holds the data for a given position @pos.
*
* @qsb: A QEMUSizedBuffer
* @pos: The index of a byte in the buffer
* @d_off: Pointer to an offset that this function will indicate
* at what position within the returned iovec the byte
* is to be found
*
* Returns the index of the iovec that holds the byte at the given
* index @pos in the byte stream; a negative number if the iovec
* for the given position @pos does not exist.
*/
static ssize_t qsb_get_iovec(const QEMUSizedBuffer *qsb,
off_t pos, off_t *d_off)
{
ssize_t i;
off_t curr = 0;
if (pos > qsb->used) {
return -1;
}
for (i = 0; i < qsb->n_iov; i++) {
if (curr + qsb->iov[i].iov_len > pos) {
*d_off = pos - curr;
return i;
}
curr += qsb->iov[i].iov_len;
}
return -1;
}
/*
* Convert the QEMUSizedBuffer into a flat buffer.
*
* Note: If at all possible, try to avoid this function since it
* may unnecessarily copy memory around.
*
* @qsb: pointer to QEMUSizedBuffer
* @start: offset to start at
* @count: number of bytes to copy
* @buf: a pointer to a buffer to write into (at least @count bytes)
*
* Returns the number of bytes copied into the output buffer
*/
ssize_t qsb_get_buffer(const QEMUSizedBuffer *qsb, off_t start,
size_t count, uint8_t *buffer)
{
const struct iovec *iov;
size_t to_copy, all_copy;
ssize_t index;
off_t s_off;
off_t d_off = 0;
char *s;
if (start > qsb->used) {
return 0;
}
all_copy = qsb->used - start;
if (all_copy > count) {
all_copy = count;
} else {
count = all_copy;
}
index = qsb_get_iovec(qsb, start, &s_off);
if (index < 0) {
return 0;
}
while (all_copy > 0) {
iov = &qsb->iov[index];
s = iov->iov_base;
to_copy = iov->iov_len - s_off;
if (to_copy > all_copy) {
to_copy = all_copy;
}
memcpy(&buffer[d_off], &s[s_off], to_copy);
d_off += to_copy;
all_copy -= to_copy;
s_off = 0;
index++;
}
return count;
}
/**
* Grow the QEMUSizedBuffer to the given size and allocate
* memory for it.
*
* @qsb: A QEMUSizedBuffer
* @new_size: The new size of the buffer
*
* Return:
* a negative error code in case of memory allocation failure
* or
* the new size of the buffer. The returned size may be greater or equal
* to @new_size.
*/
static ssize_t qsb_grow(QEMUSizedBuffer *qsb, size_t new_size)
{
size_t needed_chunks, i;
if (qsb->size < new_size) {
struct iovec *new_iov;
size_t size_diff = new_size - qsb->size;
size_t chunk_size = (size_diff > QSB_MAX_CHUNK_SIZE)
? QSB_MAX_CHUNK_SIZE : QSB_CHUNK_SIZE;
needed_chunks = DIV_ROUND_UP(size_diff, chunk_size);
new_iov = g_try_new(struct iovec, qsb->n_iov + needed_chunks);
if (new_iov == NULL) {
return -ENOMEM;
}
/* Allocate new chunks as needed into new_iov */
for (i = qsb->n_iov; i < qsb->n_iov + needed_chunks; i++) {
new_iov[i].iov_base = g_try_malloc0(chunk_size);
new_iov[i].iov_len = chunk_size;
if (!new_iov[i].iov_base) {
size_t j;
/* Free previously allocated new chunks */
for (j = qsb->n_iov; j < i; j++) {
g_free(new_iov[j].iov_base);
}
g_free(new_iov);
return -ENOMEM;
}
}
/*
* Now we can't get any allocation errors, copy over to new iov
* and switch.
*/
for (i = 0; i < qsb->n_iov; i++) {
new_iov[i] = qsb->iov[i];
}
qsb->n_iov += needed_chunks;
g_free(qsb->iov);
qsb->iov = new_iov;
qsb->size += (needed_chunks * chunk_size);
}
return qsb->size;
}
/**
* Write into the QEMUSizedBuffer at a given position and a given
* number of bytes. This function will automatically grow the
* QEMUSizedBuffer.
*
* @qsb: A QEMUSizedBuffer
* @source: A byte array to copy data from
* @pos: The position within the @qsb to write data to
* @size: The number of bytes to copy into the @qsb
*
* Returns @size or a negative error code in case of memory allocation failure,
* or with an invalid 'pos'
*/
ssize_t qsb_write_at(QEMUSizedBuffer *qsb, const uint8_t *source,
off_t pos, size_t count)
{
ssize_t rc = qsb_grow(qsb, pos + count);
size_t to_copy;
size_t all_copy = count;
const struct iovec *iov;
ssize_t index;
char *dest;
off_t d_off, s_off = 0;
if (rc < 0) {
return rc;
}
if (pos + count > qsb->used) {
qsb->used = pos + count;
}
index = qsb_get_iovec(qsb, pos, &d_off);
if (index < 0) {
return -EINVAL;
}
while (all_copy > 0) {
iov = &qsb->iov[index];
dest = iov->iov_base;
to_copy = iov->iov_len - d_off;
if (to_copy > all_copy) {
to_copy = all_copy;
}
memcpy(&dest[d_off], &source[s_off], to_copy);
s_off += to_copy;
all_copy -= to_copy;
d_off = 0;
index++;
}
return count;
}
/**
* Create a deep copy of the given QEMUSizedBuffer.
*
* @qsb: A QEMUSizedBuffer
*
* Returns a clone of @qsb or NULL on allocation failure
*/
QEMUSizedBuffer *qsb_clone(const QEMUSizedBuffer *qsb)
{
QEMUSizedBuffer *out = qsb_create(NULL, qsb_get_length(qsb));
size_t i;
ssize_t res;
off_t pos = 0;
if (!out) {
return NULL;
}
for (i = 0; i < qsb->n_iov; i++) {
res = qsb_write_at(out, qsb->iov[i].iov_base,
pos, qsb->iov[i].iov_len);
if (res < 0) {
qsb_free(out);
return NULL;
}
pos += res;
}
return out;
}
typedef struct QEMUBuffer {
QEMUSizedBuffer *qsb;
QEMUFile *file;
} QEMUBuffer;
static int buf_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
{
QEMUBuffer *s = opaque;
ssize_t len = qsb_get_length(s->qsb) - pos;
if (len <= 0) {
return 0;
}
if (len > size) {
len = size;
}
return qsb_get_buffer(s->qsb, pos, len, buf);
}
static int buf_put_buffer(void *opaque, const uint8_t *buf,
int64_t pos, int size)
{
QEMUBuffer *s = opaque;
return qsb_write_at(s->qsb, buf, pos, size);
}
static int buf_close(void *opaque)
{
QEMUBuffer *s = opaque;
qsb_free(s->qsb);
g_free(s);
return 0;
}
const QEMUSizedBuffer *qemu_buf_get(QEMUFile *f)
{
QEMUBuffer *p;
qemu_fflush(f);
p = f->opaque;
return p->qsb;
}
static const QEMUFileOps buf_read_ops = {
.get_buffer = buf_get_buffer,
.close = buf_close,
};
static const QEMUFileOps buf_write_ops = {
.put_buffer = buf_put_buffer,
.close = buf_close,
};
QEMUFile *qemu_bufopen(const char *mode, QEMUSizedBuffer *input)
{
QEMUBuffer *s;
if (mode == NULL || (mode[0] != 'r' && mode[0] != 'w') ||
mode[1] != '\0') {
error_report("qemu_bufopen: Argument validity check failed");
return NULL;
}
s = g_malloc0(sizeof(QEMUBuffer));
if (mode[0] == 'r') {
s->qsb = input;
}
if (s->qsb == NULL) {
s->qsb = qsb_create(NULL, 0);
}
if (!s->qsb) {
g_free(s);
error_report("qemu_bufopen: qsb_create failed");
return NULL;
}
if (mode[0] == 'r') {
s->file = qemu_fopen_ops(s, &buf_read_ops);
} else {
s->file = qemu_fopen_ops(s, &buf_write_ops);
}
return s->file;
}