summaryrefslogtreecommitdiffstats
path: root/src/server/globals.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/server/globals.h')
-rw-r--r--src/server/globals.h192
1 files changed, 133 insertions, 59 deletions
diff --git a/src/server/globals.h b/src/server/globals.h
index b248800..bde1184 100644
--- a/src/server/globals.h
+++ b/src/server/globals.h
@@ -1,101 +1,133 @@
#ifndef _GLOBALS_H_
#define _GLOBALS_H_
-#include "../types.h"
-#include "../shared/fdsignal.h"
-#include "../serverconfig.h"
+#include <dnbd3/types.h>
+#include <dnbd3/shared/fdsignal.h>
+#include <dnbd3/config/server.h>
#include <stdint.h>
#include <stdatomic.h>
#include <time.h>
#include <pthread.h>
+#include "reftypes.h"
typedef struct timespec ticks;
// ######### All structs/types used by the server ########
-typedef struct _dnbd3_connection dnbd3_connection_t;
+typedef struct _dnbd3_uplink dnbd3_uplink_t;
typedef struct _dnbd3_image dnbd3_image_t;
typedef struct _dnbd3_client dnbd3_client_t;
-// Slot is free, can be used.
-// Must only be set in uplink_handle_receive() or uplink_remove_client()
-#define ULR_FREE 0
-// Slot has been filled with a request that hasn't been sent to the upstream server yet, matching request can safely rely on reuse.
-// Must only be set in uplink_request()
-#define ULR_NEW 1
-// Slot is occupied, reply has not yet been received, matching request can safely rely on reuse.
-// Must only be set in uplink_mainloop() or uplink_request()
-#define ULR_PENDING 2
-// Slot is being processed, do not consider for hop on.
-// Must only be set in uplink_handle_receive()
-#define ULR_PROCESSING 3
-typedef struct
+typedef void (*uplink_callback)(void *data, uint64_t handle, uint64_t start, uint32_t length, const char *buffer);
+
+typedef struct _dnbd3_queue_client
+{
+ struct _dnbd3_queue_client *next;
+ void* data; // Passed back to callback
+ uint64_t handle; // Passed back to callback
+ uint64_t from, to; // Client range
+ uplink_callback callback; // Callback function
+} dnbd3_queue_client_t;
+
+typedef struct _dnbd3_queue_entry
{
- uint64_t handle; // Client defined handle to pass back in reply
- uint64_t from; // First byte offset of requested block (ie. 4096)
- uint64_t to; // Last byte + 1 of requested block (ie. 8192, if request len is 4096, resulting in bytes 4096-8191)
- dnbd3_client_t * client; // Client to send reply to
- int status; // status of this entry: ULR_*
-#ifdef _DEBUG
- ticks entered; // When this request entered the queue (for debugging)
+ struct _dnbd3_queue_entry *next;
+ uint64_t handle; // Our handle for this entry
+ uint64_t from; // First byte offset of requested block (ie. 4096)
+ uint64_t to; // Last byte + 1 of requested block (ie. 8192, if request len is 4096, resulting in bytes 4096-8191)
+ dnbd3_queue_client_t *clients;
+#ifdef DEBUG
+ ticks entered; // When this request entered the queue (for debugging)
#endif
- uint8_t hopCount; // How many hops this request has already taken across proxies
-} dnbd3_queued_request_t;
+ uint8_t hopCount; // How many hops this request has already taken across proxies
+ bool sent; // Already sent to uplink?
+} dnbd3_queue_entry_t;
+
+typedef struct _ns
+{
+ struct _ns *next;
+ char *name;
+ size_t len;
+} dnbd3_ns_t;
+
+typedef struct
+{
+ int fails; // Hard fail: Connection failed
+ int rttIndex;
+ uint32_t rtt[SERVER_RTT_PROBES];
+ bool isPrivate, isClientOnly;
+ bool blocked; // If true count down fails until 0 to enable again
+ ticks lastFail; // Last hard fail
+ dnbd3_host_t host;
+ char comment[COMMENT_LENGTH];
+ _Atomic(dnbd3_ns_t *) nameSpaces; // Linked list of name spaces
+} dnbd3_alt_server_t;
+
+typedef struct
+{
+ int fails; // Soft fail: Image not found
+ int rttIndex;
+ uint32_t rtt[SERVER_RTT_PROBES];
+ bool blocked; // True if server is to be ignored and fails should be counted down
+ bool initDone;
+} dnbd3_alt_local_t;
+
+typedef struct {
+ int fd; // Socket fd for this connection
+ int version; // Protocol version of remote server
+ int index; // Entry in uplinks list
+} dnbd3_server_connection_t;
#define RTT_IDLE 0 // Not in progress
#define RTT_INPROGRESS 1 // In progess, not finished
#define RTT_DONTCHANGE 2 // Finished, but no better alternative found
#define RTT_DOCHANGE 3 // Finished, better alternative written to .betterServer + .betterFd
#define RTT_NOT_REACHABLE 4 // No uplink was reachable
-struct _dnbd3_connection
+struct _dnbd3_uplink
{
- int fd; // socket fd to remote server
- int version; // remote server protocol version
+ ref reference;
+ dnbd3_server_connection_t current; // Currently active connection; fd == -1 means disconnected
+ dnbd3_server_connection_t better; // Better connection as found by altserver worker; fd == -1 means none
dnbd3_signal_t* signal; // used to wake up the process
pthread_t thread; // thread holding the connection
pthread_mutex_t sendMutex; // For locking socket while sending
pthread_mutex_t queueLock; // lock for synchronization on request queue etc.
dnbd3_image_t *image; // image that this uplink is used for; do not call get/release for this pointer
- dnbd3_host_t currentServer; // Current server we're connected to
pthread_mutex_t rttLock; // When accessing rttTestResult, betterFd or betterServer
- int rttTestResult; // RTT_*
+ atomic_int rttTestResult; // RTT_*
int cacheFd; // used to write to the image, in case it is relayed. ONLY USE FROM UPLINK THREAD!
- int betterVersion; // protocol version of better server
- int betterFd; // Active connection to better server, ready to use
- dnbd3_host_t betterServer; // The better server
uint8_t *recvBuffer; // Buffer for receiving payload
uint32_t recvBufferLen; // Len of ^^
- volatile bool shutdown; // signal this thread to stop, must only be set from uplink_shutdown() or cleanup in uplink_mainloop()
+ atomic_bool shutdown; // signal this thread to stop, must only be set from uplink_shutdown() or cleanup in uplink_mainloop()
bool replicatedLastBlock; // bool telling if the last block has been replicated yet
bool cycleDetected; // connection cycle between proxies detected for current remote server
int nextReplicationIndex; // Which index in the cache map we should start looking for incomplete blocks at
// If BGR == BGR_HASHBLOCK, -1 means "currently no incomplete block"
- uint64_t replicationHandle; // Handle of pending replication request
atomic_uint_fast64_t bytesReceived; // Number of bytes received by the uplink since startup.
+ atomic_uint_fast64_t bytesReceivedLastSave; // Number of bytes received when we last saved the cache map
int queueLen; // length of queue
- uint32_t idleTime; // How many seconds the uplink was idle (apart from keep-alives)
- dnbd3_queued_request_t queue[SERVER_MAX_UPLINK_QUEUE];
+ int idleTime; // How many seconds the uplink was idle (apart from keep-alives)
+ dnbd3_queue_entry_t *queue;
+ atomic_uint_fast32_t queueId;
+ dnbd3_alt_local_t altData[SERVER_MAX_ALTS];
};
typedef struct
{
- char comment[COMMENT_LENGTH];
- dnbd3_host_t host;
- unsigned int rtt[SERVER_RTT_PROBES];
- unsigned int rttIndex;
- bool isPrivate, isClientOnly;
- ticks lastFail;
- int numFails;
-} dnbd3_alt_server_t;
-
-typedef struct
-{
uint8_t host[16];
int bytes;
int bitMask;
int permissions;
} dnbd3_access_rule_t;
+typedef struct
+{
+ ref reference;
+ atomic_bool dirty; // Cache map has been modified outside uplink (only integrity checker for now)
+ bool unchanged; // How many times in a row a reloaded cache map went unchanged
+ _Atomic uint8_t map[];
+} dnbd3_cache_map_t;
+
/**
* Image struct. An image path could be something like
* /mnt/images/rz/zfs/Windows7 ZfS.vmdk.r1
@@ -106,35 +138,44 @@ struct _dnbd3_image
{
char *path; // absolute path of the image
char *name; // public name of the image (usually relative path minus revision ID)
- dnbd3_connection_t *uplink; // pointer to a server connection
- uint8_t *cache_map; // cache map telling which parts are locally cached, NULL if complete
+ weakref uplinkref; // pointer to a server connection
+ weakref ref_cacheMap; // cache map telling which parts are locally cached, NULL if complete
uint64_t virtualFilesize; // virtual size of image (real size rounded up to multiple of 4k)
uint64_t realFilesize; // actual file size on disk
ticks atime; // last access time
- ticks lastWorkCheck; // last time a non-working image has been checked
ticks nextCompletenessEstimate; // next time the completeness estimate should be updated
uint32_t *crc32; // list of crc32 checksums for each 16MiB block in image
uint32_t masterCrc32; // CRC-32 of the crc-32 list
int readFd; // used to read the image. Used from multiple threads, so use atomic operations (pread et al)
- int completenessEstimate; // Completeness estimate in percent
- int users; // clients currently using this image
+ atomic_int completenessEstimate; // Completeness estimate in percent
+ atomic_int users; // clients currently using this image. XXX Lock on imageListLock when modifying and checking whether the image should be freed. Reading it elsewhere is fine without the lock.
int id; // Unique ID of this image. Only unique in the context of this running instance of DNBD3-Server
- bool working; // true if image exists and completeness is == 100% or a working upstream proxy is connected
+ struct {
+ atomic_bool read; // Error reading from file
+ atomic_bool write; // Error writing to file
+ atomic_bool changed; // File disappeared or changed, thorough check required if it seems to be back
+ atomic_bool uplink; // No uplink connected
+ atomic_bool queue; // Too many requests waiting on uplink
+ } problem;
uint16_t rid; // revision of image
+ bool accessed; // image was accessed since .meta was written
pthread_mutex_t lock;
};
+#define PIMG(x) (x)->name, (int)(x)->rid
struct _dnbd3_client
{
#define HOSTNAMELEN (48)
atomic_uint_fast64_t bytesSent; // Byte counter for this client.
- dnbd3_image_t *image; // Image in use by this client, or NULL during handshake
+ dnbd3_image_t * _Atomic image; // Image in use by this client, or NULL during handshake
int sock;
+ _Atomic uint8_t relayedCount; // How many requests are in-flight to the uplink server
bool isServer; // true if a server in proxy mode, false if real client
dnbd3_host_t host;
char hostName[HOSTNAMELEN]; // inet_ntop version of host
pthread_mutex_t sendMutex; // Held while writing to sock if image is incomplete (since uplink uses socket too)
pthread_mutex_t lock;
+ pthread_t thread;
};
// #######################################################
@@ -188,12 +229,12 @@ extern atomic_bool _removeMissingImages;
/**
* Read timeout when waiting for or sending data on an uplink
*/
-extern atomic_int _uplinkTimeout;
+extern atomic_uint _uplinkTimeout;
/**
* Read timeout when waiting for or sending data from/to client
*/
-extern atomic_int _clientTimeout;
+extern atomic_uint _clientTimeout;
/**
* If true, images with no active client will have their fd closed after some
@@ -216,6 +257,11 @@ extern atomic_int _backgroundReplication;
extern atomic_int _bgrMinClients;
/**
+ * How many in-flight replication requests we should target (per uplink)
+ */
+extern atomic_int _bgrWindowSize;
+
+/**
* (In proxy mode): If connecting client is a proxy, and the requested image
* is not known locally, should we ask our known alt servers for it?
* Otherwise the request is rejected.
@@ -237,6 +283,12 @@ extern atomic_bool _lookupMissingForProxy;
extern atomic_bool _sparseFiles;
/**
+ * If true, don't abort image replication if preallocating
+ * the image fails, but retry with sparse file.
+ */
+extern atomic_bool _ignoreAllocErrors;
+
+/**
* Port to listen on (default: #define PORT (5003))
*/
extern atomic_int _listenPort;
@@ -257,7 +309,7 @@ extern atomic_int _maxImages;
* Usually this isn't even a megabyte for "real" clients (blockdev
* or fuse).
*/
-extern atomic_int _maxPayload;
+extern atomic_uint _maxPayload;
/**
* If in proxy mode, don't replicate images that are
@@ -273,6 +325,28 @@ extern atomic_uint_fast64_t _maxReplicationSize;
extern atomic_bool _pretendClient;
/**
+ * Minimum uptime in seconds before proxy starts deleting old
+ * images if running out of space. -1 disables automatic deletion.
+ * Only relevant in proxy mode.
+ */
+extern atomic_int _autoFreeDiskSpaceDelay;
+
+/**
+ * When handling a client request, this sets the maximum amount
+ * of bytes we prefetch offset right at the end of the client request.
+ * The prefetch size will be MIN( length * 3, _maxPrefetch ), if
+ * length <= _maxPrefetch, so effectively, setting this to 0 disables
+ * any prefetching.
+ */
+extern atomic_uint _maxPrefetch;
+
+/**
+ * Use with care. Can severely degrade performance.
+ * Set either 0 or very high.
+ */
+extern atomic_uint _minRequestSize;
+
+/**
* Load the server configuration.
*/
void globals_loadConfig();