diff options
Diffstat (limited to 'src/server/net.c')
| -rw-r--r-- | src/server/net.c | 280 |
1 files changed, 135 insertions, 145 deletions
diff --git a/src/server/net.c b/src/server/net.c index eb51d29..f2f63b8 100644 --- a/src/server/net.c +++ b/src/server/net.c @@ -20,11 +20,13 @@ #include "helper.h" #include "image.h" +#include "iscsi.h" #include "uplink.h" #include "locks.h" #include "rpc.h" #include "altservers.h" #include "reference.h" +#include "sendfile.h" #include <dnbd3/shared/sockhelper.h> #include <dnbd3/shared/timing.h> @@ -32,26 +34,19 @@ #include <dnbd3/shared/serialize.h> #include <assert.h> +#include <netinet/tcp.h> -#ifdef __linux__ -#include <sys/sendfile.h> -#endif -#ifdef __FreeBSD__ -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/uio.h> -#endif #include <jansson.h> #include <inttypes.h> #include <stdatomic.h> #include <signal.h> +#include <dnbd3/afl.h> + static dnbd3_client_t *_clients[SERVER_MAX_CLIENTS]; static int _num_clients = 0; static pthread_mutex_t _clients_lock; -static char nullbytes[500]; - static atomic_uint_fast64_t totalBytesSent = 0; // Adding and removing clients -- list management @@ -63,9 +58,7 @@ static void uplinkCallback(void *data, uint64_t handle, uint64_t start, uint32_t static inline bool recv_request_header(int sock, dnbd3_request_t *request) { ssize_t ret, fails = 0; -#ifdef DNBD3_SERVER_AFL - sock = 0; -#endif + // Read request header from socket while ( ( ret = recv( sock, request, sizeof(*request), MSG_WAITALL ) ) != sizeof(*request) ) { if ( errno == EINTR && ++fails < 10 ) continue; @@ -90,9 +83,6 @@ static inline bool recv_request_header(int sock, dnbd3_request_t *request) static inline bool recv_request_payload(int sock, uint32_t size, serialized_buffer_t *payload) { -#ifdef DNBD3_SERVER_AFL - sock = 0; -#endif if ( size == 0 ) { logadd( LOG_ERROR, "Called recv_request_payload() to receive 0 bytes" ); return false; @@ -101,8 +91,9 @@ static inline bool recv_request_payload(int sock, uint32_t size, serialized_buff logadd( LOG_ERROR, "Called recv_request_payload() for more bytes than the passed buffer could hold!" ); return false; } - if ( sock_recv( sock, payload->buffer, size ) != (ssize_t)size ) { - logadd( LOG_DEBUG1, "Could not receive request payload of length %d\n", (int)size ); + const ssize_t ret = sock_recv( sock, payload->buffer, size ); + if ( ret != (ssize_t)size ) { + logadd( LOG_DEBUG1, "Could not receive request payload of length %d (got %d, errno %d)\n", (int)size, (int)ret, errno ); return false; } // Prepare payload buffer for reading @@ -111,60 +102,82 @@ static inline bool recv_request_payload(int sock, uint32_t size, serialized_buff } /** - * Send reply with optional payload. payload can be null. The caller has to - * acquire the sendMutex first. + * Send reply with optional payload. payload can be null. */ -static inline bool send_reply(int sock, dnbd3_reply_t *reply, const void *payload) +static bool send_reply(dnbd3_client_t *client, dnbd3_reply_t *reply, const void *payload, const bool lock) { - const uint32_t size = reply->size; + const uint32_t size = reply->size; // Copy because of fixup_reply() + fixup_reply( *reply ); - if ( sock_sendAll( sock, reply, sizeof(dnbd3_reply_t), 1 ) != sizeof(dnbd3_reply_t) ) { + if ( lock ) { + mutex_lock( &client->sendMutex ); + } + if ( sock_sendAll( client->sock, reply, sizeof(dnbd3_reply_t), 1 ) != sizeof(dnbd3_reply_t) ) { + if ( lock ) { + mutex_unlock( &client->sendMutex ); + } logadd( LOG_DEBUG1, "Sending reply header to client failed" ); return false; } if ( size != 0 && payload != NULL ) { - if ( sock_sendAll( sock, payload, size, 1 ) != (ssize_t)size ) { + if ( sock_sendAll( client->sock, payload, size, 1 ) != (ssize_t)size ) { + if ( lock ) { + mutex_unlock( &client->sendMutex ); + } logadd( LOG_DEBUG1, "Sending payload of %"PRIu32" bytes to client failed", size ); return false; } } + if ( lock ) { + mutex_unlock( &client->sendMutex ); + } return true; } -/** - * Send given amount of null bytes. The caller has to acquire the sendMutex first. - */ -static inline bool sendPadding( const int fd, uint32_t bytes ) +void net_init() { - ssize_t ret; - while ( bytes >= sizeof(nullbytes) ) { - ret = sock_sendAll( fd, nullbytes, sizeof(nullbytes), 2 ); - if ( ret <= 0 ) - return false; - bytes -= (uint32_t)ret; - } - return sock_sendAll( fd, nullbytes, bytes, 2 ) == (ssize_t)bytes; + mutex_init( &_clients_lock, LOCK_CLIENT_LIST ); } -void net_init() +void initClientStruct(dnbd3_client_t *client) { - mutex_init( &_clients_lock, LOCK_CLIENT_LIST ); + mutex_init( &client->lock, LOCK_CLIENT ); + mutex_init( &client->sendMutex, LOCK_CLIENT_SEND ); + + mutex_lock( &client->lock ); + host_to_string( &client->host, client->hostName, HOSTNAMELEN ); + client->hostName[HOSTNAMELEN-1] = '\0'; + mutex_unlock( &client->lock ); + client->bytesSent = 0; + client->relayedCount = 0; } void* net_handleNewConnection(void *clientPtr) { dnbd3_client_t * const client = (dnbd3_client_t *)clientPtr; dnbd3_request_t request; + dnbd3_cache_map_t *cache = NULL; client->thread = pthread_self(); // Await data from client. Since this is a fresh connection, we expect data right away sock_setTimeout( client->sock, _clientTimeout ); + // NODELAY makes sense since we're sending a lot of data + int e2 = 1; + socklen_t optlen = sizeof(e2); + setsockopt( client->sock, IPPROTO_TCP, TCP_NODELAY, (void *)&e2, optlen ); + // Also increase send buffer + if ( getsockopt( client->sock, SOL_SOCKET, SO_SNDBUF, (void *)&e2, &optlen ) == 0 ) { +#ifdef __linux__ + // Linux doubles the value to account for overhead, get "real" value + e2 /= 2; +#endif + if ( e2 < SERVER_TCP_BUFFER_MIN_SIZE_PAYLOAD ) { + e2 = SERVER_TCP_BUFFER_MIN_SIZE_PAYLOAD; + setsockopt( client->sock, SOL_SOCKET, SO_SNDBUF, &e2, sizeof(e2) ); + } + } do { -#ifdef DNBD3_SERVER_AFL - const int ret = (int)recv( 0, &request, sizeof(request), MSG_WAITALL ); -#else const int ret = (int)recv( client->sock, &request, sizeof(request), MSG_WAITALL ); -#endif // It's expected to be a real dnbd3 client // Check request for validity. This implicitly dictates that all HTTP requests are more than 24 bytes... if ( ret != (int)sizeof(request) ) { @@ -177,8 +190,21 @@ void* net_handleNewConnection(void *clientPtr) if ( ((char*)&request)[0] == 'G' || ((char*)&request)[0] == 'P' ) { // Close enough... rpc_sendStatsJson( client->sock, &client->host, &request, ret ); + } else if ( ((char*)&request)[0] == 0x43 ) { // Login opcode 0x03 + immediate bit (0x40) set + if ( !_iScsiServer ) { + logadd( LOG_INFO, "Received iSCSI login request from %s, but iSCSI server is not enabled", client->hostName ); + } else { + initClientStruct( client ); + if ( !addToList( client ) ) { + freeClientStruct( client ); + logadd( LOG_WARNING, "Could not add new iSCSI client to list when connecting" ); + } else { + iscsi_connection_handle( client, &request, ret ); + goto exit_client_cleanup; + } + } } else { - logadd( LOG_DEBUG1, "Magic in client handshake incorrect" ); + logadd( LOG_DEBUG1, "Magic in client handshake unknown" ); } goto fail_preadd; } @@ -190,26 +216,17 @@ void* net_handleNewConnection(void *clientPtr) } } while (0); // Fully init client struct - mutex_init( &client->lock, LOCK_CLIENT ); - mutex_init( &client->sendMutex, LOCK_CLIENT_SEND ); - - mutex_lock( &client->lock ); - host_to_string( &client->host, client->hostName, HOSTNAMELEN ); - client->hostName[HOSTNAMELEN-1] = '\0'; - mutex_unlock( &client->lock ); - client->bytesSent = 0; - client->relayedCount = 0; + initClientStruct( client ); if ( !addToList( client ) ) { freeClientStruct( client ); - logadd( LOG_WARNING, "Could not add new client to list when connecting" ); - return NULL; + logadd( LOG_WARNING, "Could not add new DNBD3 client to list when connecting" ); + goto fail_preadd; } dnbd3_reply_t reply; dnbd3_image_t *image = NULL; - dnbd3_cache_map_t *cache = NULL; int image_file = -1; int num; @@ -300,7 +317,7 @@ void* net_handleNewConnection(void *clientPtr) serializer_put_uint64( &payload, image->virtualFilesize ); reply.cmd = CMD_SELECT_IMAGE; reply.size = serializer_get_written_length( &payload ); - if ( !send_reply( client->sock, &reply, &payload ) ) { + if ( !send_reply( client, &reply, &payload, false ) ) { bOk = false; } } @@ -319,7 +336,8 @@ void* net_handleNewConnection(void *clientPtr) while ( recv_request_header( client->sock, &request ) ) { if ( _shutdown ) break; if ( likely ( request.cmd == CMD_GET_BLOCK ) ) { - + // since the relayed count can only increase in this very loop, it is safe to check this here once + const bool lock = client->relayedCount > 0; const uint64_t offset = request.offset_small; // Copy to full uint64 to prevent repeated masking reply.handle = request.handle; if ( unlikely( offset >= image->virtualFilesize ) ) { @@ -327,7 +345,7 @@ void* net_handleNewConnection(void *clientPtr) logadd( LOG_WARNING, "Client %s requested non-existent block", client->hostName ); reply.size = 0; reply.cmd = CMD_ERROR; - send_reply( client->sock, &reply, NULL ); + send_reply( client, &reply, NULL, lock ); continue; } if ( unlikely( offset + request.size > image->virtualFilesize ) ) { @@ -335,7 +353,17 @@ void* net_handleNewConnection(void *clientPtr) logadd( LOG_WARNING, "Client %s requested data block that extends beyond image size", client->hostName ); reply.size = 0; reply.cmd = CMD_ERROR; - send_reply( client->sock, &reply, NULL ); + send_reply( client, &reply, NULL, lock ); + continue; + } + if ( unlikely( offset >= image->realFilesize ) ) { + // Shortcut - only virtual bytes (padding) + reply.cmd = CMD_GET_BLOCK; + reply.size = request.size; + if ( lock ) mutex_lock( &client->sendMutex ); + send_reply( client, &reply, NULL, false ); + sock_sendPadding( client->sock, request.size ); + if ( lock ) mutex_unlock( &client->sendMutex ); continue; } @@ -373,78 +401,36 @@ void* net_handleNewConnection(void *clientPtr) reply.size = request.size; fixup_reply( reply ); - const bool lock = image->uplinkref != NULL; if ( lock ) mutex_lock( &client->sendMutex ); // Send reply header if ( send( client->sock, &reply, sizeof(dnbd3_reply_t), (request.size == 0 ? 0 : MSG_MORE) ) != sizeof(dnbd3_reply_t) ) { + logadd( LOG_DEBUG1, "Sending CMD_GET_BLOCK reply header to %s failed (errno=%d)", client->hostName, errno ); if ( lock ) mutex_unlock( &client->sendMutex ); - logadd( LOG_DEBUG1, "Sending CMD_GET_BLOCK reply header to %s failed", client->hostName ); goto exit_client_cleanup; } - if ( request.size != 0 ) { - // Send payload if request length > 0 - size_t done = 0; - off_t foffset = (off_t)offset; - size_t realBytes; - if ( offset + request.size <= image->realFilesize ) { - realBytes = request.size; - } else { - realBytes = (size_t)(image->realFilesize - offset); + const size_t realBytes = offset + request.size <= image->realFilesize + ? request.size : (image->realFilesize - offset); + bool ret = sendfile_all( image_file, client->sock, offset, realBytes ); + if ( !ret ) { + const int err = errno; + + if ( lock ) mutex_unlock( &client->sendMutex ); + if ( err != EPIPE && err != ECONNRESET && err != ESHUTDOWN + && err != EAGAIN && err != EWOULDBLOCK ) { + logadd( LOG_DEBUG1, "sendfile to %s failed (%d bytes, errno=%d)", + client->hostName, (int)realBytes, err ); } - while ( done < realBytes ) { - // TODO: Should we consider EOPNOTSUPP on BSD for sendfile and fallback to read/write? - // Linux would set EINVAL or ENOSYS instead, which it unfortunately also does for a couple of other failures :/ - // read/write would kill performance anyways so a fallback would probably be of little use either way. -#ifdef DNBD3_SERVER_AFL - char buf[1000]; - size_t cnt = realBytes - done; - if ( cnt > 1000 ) { - cnt = 1000; - } - const ssize_t sent = pread( image_file, buf, cnt, foffset ); - if ( sent > 0 ) { - //write( client->sock, buf, sent ); // This is not verified in any way, so why even do it... - } else { - const int err = errno; -#elif defined(__linux__) - const ssize_t sent = sendfile( client->sock, image_file, &foffset, realBytes - done ); - if ( sent <= 0 ) { - const int err = errno; -#elif defined(__FreeBSD__) - off_t sent; - const int ret = sendfile( image_file, client->sock, foffset, realBytes - done, NULL, &sent, 0 ); - if ( ret == -1 || sent == 0 ) { - const int err = errno; - if ( ret == -1 ) { - if ( err == EAGAIN || err == EINTR ) { // EBUSY? manpage doesn't explicitly mention *sent here.. But then again we dont set the according flag anyways - done += sent; - continue; - } - sent = -1; - } -#endif - if ( lock ) mutex_unlock( &client->sendMutex ); - if ( sent == -1 ) { - if ( err != EPIPE && err != ECONNRESET && err != ESHUTDOWN - && err != EAGAIN && err != EWOULDBLOCK ) { - logadd( LOG_DEBUG1, "sendfile to %s failed (image to net. sent %d/%d, errno=%d)", - client->hostName, (int)done, (int)realBytes, err ); - } - if ( err == EBADF || err == EFAULT || err == EINVAL || err == EIO ) { - logadd( LOG_INFO, "Disabling %s:%d", image->name, image->rid ); - image->problem.read = true; - } - } - goto exit_client_cleanup; - } - done += sent; + if ( err == EBADF || err == EFAULT || err == EINVAL || err == EIO ) { + logadd( LOG_INFO, "Disabling %s:%d", image->name, image->rid ); + image->problem.read = true; } - if ( request.size > (uint32_t)realBytes ) { - if ( !sendPadding( client->sock, request.size - (uint32_t)realBytes ) ) { - if ( lock ) mutex_unlock( &client->sendMutex ); - goto exit_client_cleanup; - } + goto exit_client_cleanup; + } + if ( request.size > (uint32_t)realBytes ) { + if ( !sock_sendPadding( client->sock, request.size - (uint32_t)realBytes ) ) { + if ( lock ) mutex_unlock( &client->sendMutex ); + goto exit_client_cleanup; } } if ( lock ) mutex_unlock( &client->sendMutex ); @@ -466,22 +452,18 @@ void* net_handleNewConnection(void *clientPtr) num = altservers_getListForClient( client, server_list, NUMBER_SERVERS ); reply.cmd = CMD_GET_SERVERS; reply.size = (uint32_t)( num * sizeof(dnbd3_server_entry_t) ); - mutex_lock( &client->sendMutex ); - send_reply( client->sock, &reply, server_list ); - mutex_unlock( &client->sendMutex ); - goto set_name; + if ( !send_reply( client, &reply, server_list, true ) ) { + logadd( LOG_DEBUG1, "Sending CMD_GET_SERVERS reply to %s failed.", client->hostName ); + goto exit_client_cleanup; + } break; case CMD_KEEPALIVE: reply.cmd = CMD_KEEPALIVE; reply.size = 0; - mutex_lock( &client->sendMutex ); - send_reply( client->sock, &reply, NULL ); - mutex_unlock( &client->sendMutex ); -set_name: ; - if ( !hasName ) { - hasName = true; - setThreadName( client->hostName ); + if ( !send_reply( client, &reply, NULL, true ) ) { + logadd( LOG_DEBUG1, "Sending CMD_KEEPALIVE reply to %s failed.", client->hostName ); + goto exit_client_cleanup; } break; @@ -494,14 +476,18 @@ set_name: ; mutex_lock( &client->sendMutex ); if ( image->crc32 == NULL ) { reply.size = 0; - send_reply( client->sock, &reply, NULL ); + bOk = send_reply( client, &reply, NULL, false ); } else { const uint32_t size = reply.size = (uint32_t)( (IMGSIZE_TO_HASHBLOCKS(image->realFilesize) + 1) * sizeof(uint32_t) ); - send_reply( client->sock, &reply, NULL ); - send( client->sock, &image->masterCrc32, sizeof(uint32_t), MSG_MORE ); - send( client->sock, image->crc32, size - sizeof(uint32_t), 0 ); + bOk = send_reply( client, &reply, NULL, false ); + bOk = bOk && send( client->sock, &image->masterCrc32, sizeof(uint32_t), MSG_MORE ) == sizeof(uint32_t); + bOk = bOk && send( client->sock, image->crc32, size - sizeof(uint32_t), 0 ) == size - sizeof(uint32_t); } mutex_unlock( &client->sendMutex ); + if ( !bOk ) { + logadd( LOG_DEBUG1, "Sending CMD_GET_CRC32 reply to %s failed.", client->hostName ); + goto exit_client_cleanup; + } break; default: @@ -509,6 +495,10 @@ set_name: ; break; } // end switch + if ( !hasName ) { + hasName = true; + setThreadName( client->hostName ); + } } // end loop } // end bOk exit_client_cleanup: ; @@ -516,11 +506,11 @@ exit_client_cleanup: ; removeFromList( client ); totalBytesSent += client->bytesSent; // Access time, but only if client didn't just probe - if ( image != NULL && client->bytesSent > DNBD3_BLOCK_SIZE * 10 ) { - mutex_lock( &image->lock ); - timing_get( &image->atime ); - image->accessed = true; - mutex_unlock( &image->lock ); + if ( client->image != NULL && client->bytesSent > DNBD3_BLOCK_SIZE * 10 ) { + mutex_lock( &client->image->lock ); + timing_get( &client->image->atime ); + client->image->accessed = true; + mutex_unlock( &client->image->lock ); } if ( cache != NULL ) { ref_put( &cache->reference ); @@ -686,7 +676,7 @@ static dnbd3_client_t* freeClientStruct(dnbd3_client_t *client) dnbd3_uplink_t *uplink = ref_get_uplink( &client->image->uplinkref ); if ( uplink != NULL ) { if ( client->relayedCount != 0 ) { - uplink_removeEntry( uplink, client, &uplinkCallback ); + uplink_removeEntry( uplink, client ); } ref_put( &uplink->reference ); } @@ -751,11 +741,11 @@ static void uplinkCallback(void *data, uint64_t handle, uint64_t start UNUSED, u .size = length, }; mutex_lock( &client->sendMutex ); - send_reply( client->sock, &reply, buffer ); + send_reply( client, &reply, buffer, false ); if ( buffer == NULL ) { shutdown( client->sock, SHUT_RDWR ); } - client->relayedCount--; mutex_unlock( &client->sendMutex ); + client->relayedCount--; } |
