From 9e6bdef224f700c057462a7d5e9b4a2770e04569 Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Fri, 31 Aug 2018 16:53:12 +0200 Subject: util: add qemu_write_pidfile() There are variants of qemu_create_pidfile() in qemu-pr-helper and qemu-ga. Let's have a common implementation in libqemuutil. The code is initially based from pr-helper write_pidfile(), with various improvements and suggestions from Daniel Berrangé: QEMU will leave the pidfile existing on disk when it exits which initially made me think it avoids the deletion race. The app managing QEMU, however, may well delete the pidfile after it has seen QEMU exit, and even if the app locks the pidfile before deleting it, there is still a race. eg consider the following sequence QEMU 1 libvirtd QEMU 2 1. lock(pidfile) 2. exit() 3. open(pidfile) 4. lock(pidfile) 5. open(pidfile) 6. unlink(pidfile) 7. close(pidfile) 8. lock(pidfile) IOW, at step 8 the new QEMU has successfully acquired the lock, but the pidfile no longer exists on disk because it was deleted after the original QEMU exited. While we could just say no external app should ever delete the pidfile, I don't think that is satisfactory as people don't read docs, and admins don't like stale pidfiles being left around on disk. To make this robust, I think we might want to copy libvirt's approach to pidfile acquisition which runs in a loop and checks that the file on disk /after/ acquiring the lock matches the file that was locked. Then we could in fact safely let QEMU delete its own pidfiles on clean exit.. Signed-off-by: Marc-André Lureau Message-Id: <20180831145314.14736-2-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- util/oslib-posix.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'util/oslib-posix.c') diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 13b6f8d776..0e3ab9d959 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -88,6 +88,74 @@ int qemu_daemon(int nochdir, int noclose) return daemon(nochdir, noclose); } +bool qemu_write_pidfile(const char *path, Error **errp) +{ + int fd; + char pidstr[32]; + + while (1) { + struct stat a, b; + + fd = qemu_open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR); + if (fd == -1) { + error_setg_errno(errp, errno, "Cannot open pid file"); + return false; + } + + if (fstat(fd, &b) < 0) { + error_setg_errno(errp, errno, "Cannot stat file"); + goto fail_close; + } + + if (lockf(fd, F_TLOCK, 0) < 0) { + error_setg_errno(errp, errno, "Cannot lock pid file"); + goto fail_close; + } + + /* + * Now make sure the path we locked is the same one that now + * exists on the filesystem. + */ + if (stat(path, &a) < 0) { + /* + * PID file disappeared, someone else must be racing with + * us, so try again. + */ + close(fd); + continue; + } + + if (a.st_ino == b.st_ino) { + break; + } + + /* + * PID file was recreated, someone else must be racing with + * us, so try again. + */ + close(fd); + } + + if (ftruncate(fd, 0) < 0) { + error_setg_errno(errp, errno, "Failed to truncate pid file"); + goto fail_unlink; + } + + snprintf(pidstr, sizeof(pidstr), FMT_pid "\n", getpid()); + if (write(fd, pidstr, strlen(pidstr)) != strlen(pidstr)) { + error_setg(errp, "Failed to write pid file"); + goto fail_unlink; + } + + return true; + +fail_unlink: + unlink(path); +fail_close: + close(fd); + return false; +} + void *qemu_oom_check(void *ptr) { if (ptr == NULL) { -- cgit v1.2.3-55-g7522 From 35f7f3fb5c65dcdf8315bbfd40a3c1d015663d77 Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Fri, 31 Aug 2018 16:53:13 +0200 Subject: util: use fcntl() for qemu_write_pidfile() locking Daniel Berrangé suggested to use fcntl() locks rather than lockf(). 'man lockf': On Linux, lockf() is just an interface on top of fcntl(2) locking. Many other systems implement lockf() in this way, but note that POSIX.1 leaves the relationship between lockf() and fcntl(2) locks unspecified. A portable application should probably avoid mixing calls to these interfaces. IOW, if its just a shim around fcntl() on many systems, it is clearer if we just use fcntl() directly, as we then know how fcntl() locks will behave if they're on a network filesystem like NFS. Suggested-by: Daniel P. Berrangé Signed-off-by: Marc-André Lureau Message-Id: <20180831145314.14736-3-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- util/oslib-posix.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'util/oslib-posix.c') diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 0e3ab9d959..fbd0dc8c57 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -95,6 +95,11 @@ bool qemu_write_pidfile(const char *path, Error **errp) while (1) { struct stat a, b; + struct flock lock = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_len = 0, + }; fd = qemu_open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR); if (fd == -1) { @@ -107,7 +112,7 @@ bool qemu_write_pidfile(const char *path, Error **errp) goto fail_close; } - if (lockf(fd, F_TLOCK, 0) < 0) { + if (fcntl(fd, F_SETLK, &lock)) { error_setg_errno(errp, errno, "Cannot lock pid file"); goto fail_close; } -- cgit v1.2.3-55-g7522