summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorDenis V. Lunev2015-05-12 16:30:56 +0200
committerStefan Hajnoczi2015-05-22 10:37:33 +0200
commit459b4e66129d091a11e9886ecc15a8bf9f7f3d92 (patch)
treeff1fbde6cf0d62f8bf2518dd0bbee5507487726e /block
parentblock: minimal bounce buffer alignment (diff)
downloadqemu-459b4e66129d091a11e9886ecc15a8bf9f7f3d92.tar.gz
qemu-459b4e66129d091a11e9886ecc15a8bf9f7f3d92.tar.xz
qemu-459b4e66129d091a11e9886ecc15a8bf9f7f3d92.zip
block: align bounce buffers to page
The following sequence int fd = open(argv[1], O_RDWR | O_CREAT | O_DIRECT, 0644); for (i = 0; i < 100000; i++) write(fd, buf, 4096); performs 5% better if buf is aligned to 4096 bytes. The difference is quite reliable. On the other hand we do not want at the moment to enforce bounce buffering if guest request is aligned to 512 bytes. The patch changes default bounce buffer optimal alignment to MAX(page size, 4k). 4k is chosen as maximal known sector size on real HDD. The justification of the performance improve is quite interesting. From the kernel point of view each request to the disk was split by two. This could be seen by blktrace like this: 9,0 11 1 0.000000000 11151 Q WS 312737792 + 1023 [qemu-img] 9,0 11 2 0.000007938 11151 Q WS 312738815 + 8 [qemu-img] 9,0 11 3 0.000030735 11151 Q WS 312738823 + 1016 [qemu-img] 9,0 11 4 0.000032482 11151 Q WS 312739839 + 8 [qemu-img] 9,0 11 5 0.000041379 11151 Q WS 312739847 + 1016 [qemu-img] 9,0 11 6 0.000042818 11151 Q WS 312740863 + 8 [qemu-img] 9,0 11 7 0.000051236 11151 Q WS 312740871 + 1017 [qemu-img] 9,0 5 1 0.169071519 11151 Q WS 312741888 + 1023 [qemu-img] After the patch the pattern becomes normal: 9,0 6 1 0.000000000 12422 Q WS 314834944 + 1024 [qemu-img] 9,0 6 2 0.000038527 12422 Q WS 314835968 + 1024 [qemu-img] 9,0 6 3 0.000072849 12422 Q WS 314836992 + 1024 [qemu-img] 9,0 6 4 0.000106276 12422 Q WS 314838016 + 1024 [qemu-img] and the amount of requests sent to disk (could be calculated counting number of lines in the output of blktrace) is reduced about 2 times. Both qemu-img and qemu-io are affected while qemu-kvm is not. The guest does his job well and real requests comes properly aligned (to page). Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Message-id: 1431441056-26198-3-git-send-email-den@openvz.org CC: Paolo Bonzini <pbonzini@redhat.com> CC: Kevin Wolf <kwolf@redhat.com> CC: Stefan Hajnoczi <stefanha@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'block')
-rw-r--r--block/io.c2
-rw-r--r--block/raw-posix.c13
2 files changed, 8 insertions, 7 deletions
diff --git a/block/io.c b/block/io.c
index e6c363921d..b3ea95c74f 100644
--- a/block/io.c
+++ b/block/io.c
@@ -205,7 +205,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
} else {
bs->bl.min_mem_alignment = 512;
- bs->bl.opt_mem_alignment = 512;
+ bs->bl.opt_mem_alignment = getpagesize();
}
if (bs->backing_hd) {
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 70839245fd..2990e954ae 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -301,6 +301,7 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
{
BDRVRawState *s = bs->opaque;
char *buf;
+ size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize());
/* For /dev/sg devices the alignment is not really used.
With buffered I/O, we don't have any restrictions. */
@@ -330,9 +331,9 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
/* If we could not get the sizes so far, we can only guess them */
if (!s->buf_align) {
size_t align;
- buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
- for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
- if (raw_is_io_aligned(fd, buf + align, MAX_BLOCKSIZE)) {
+ buf = qemu_memalign(max_align, 2 * max_align);
+ for (align = 512; align <= max_align; align <<= 1) {
+ if (raw_is_io_aligned(fd, buf + align, max_align)) {
s->buf_align = align;
break;
}
@@ -342,8 +343,8 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
if (!bs->request_alignment) {
size_t align;
- buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
- for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+ buf = qemu_memalign(s->buf_align, max_align);
+ for (align = 512; align <= max_align; align <<= 1) {
if (raw_is_io_aligned(fd, buf, align)) {
bs->request_alignment = align;
break;
@@ -726,7 +727,7 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
raw_probe_alignment(bs, s->fd, errp);
bs->bl.min_mem_alignment = s->buf_align;
- bs->bl.opt_mem_alignment = s->buf_align;
+ bs->bl.opt_mem_alignment = MAX(s->buf_align, getpagesize());
}
static int check_for_dasd(int fd)