From 926754534985c5ffbb277e5abf40cf9aa72b9fff Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 15 Aug 2019 14:25:28 +0200 Subject: [SHARED] Better errno handling in connect() helper --- src/shared/sockhelper.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/shared') diff --git a/src/shared/sockhelper.c b/src/shared/sockhelper.c index ab34aa1..ec80659 100644 --- a/src/shared/sockhelper.c +++ b/src/shared/sockhelper.c @@ -46,6 +46,7 @@ int sock_connect(const dnbd3_host_t * const addr, const int connect_ms, const in #endif else { logadd( LOG_DEBUG1, "Unsupported address type: %d\n", (int)addr->type ); + errno = EAFNOSUPPORT; return -1; } int client_sock = socket( proto, SOCK_STREAM, IPPROTO_TCP ); @@ -56,8 +57,10 @@ int sock_connect(const dnbd3_host_t * const addr, const int connect_ms, const in } else { sock_setTimeout( client_sock, connect_ms ); } + int e2; for ( int i = 0; i < 5; ++i ) { int ret = connect( client_sock, (struct sockaddr *)&ss, addrlen ); + e2 = errno; if ( ret != -1 || errno == EINPROGRESS || errno == EISCONN ) break; if ( errno == EINTR ) { // http://www.madore.org/~david/computers/connect-intr.html @@ -67,21 +70,26 @@ int sock_connect(const dnbd3_host_t * const addr, const int connect_ms, const in struct pollfd unix_really_sucks = { .fd = client_sock, .events = POLLOUT | POLLIN }; while ( i-- > 0 ) { int pr = poll( &unix_really_sucks, 1, connect_ms == 0 ? -1 : connect_ms ); + e2 = errno; if ( pr == 1 && ( unix_really_sucks.revents & POLLOUT ) ) break; if ( pr == -1 && errno == EINTR ) continue; close( client_sock ); + errno = e2; return -1; } sockaddr_storage junk; socklen_t more_junk = sizeof(junk); if ( getpeername( client_sock, (struct sockaddr*)&junk, &more_junk ) == -1 ) { + e2 = errno; close( client_sock ); + errno = e2; return -1; } break; #endif } // EINTR close( client_sock ); + errno = e2; return -1; } if ( connect_ms != -1 && connect_ms != rw_ms ) { -- cgit v1.2.3-55-g7522 From 645bb4b91b06c0eb23867aab1511b080ce122d96 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Fri, 30 Aug 2019 09:46:53 +0200 Subject: [SERVER] Introduce debug spam --- src/server/uplink.c | 16 ++++++++-------- src/shared/timing.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/shared') diff --git a/src/server/uplink.c b/src/server/uplink.c index 52cf417..4cea7e2 100644 --- a/src/server/uplink.c +++ b/src/server/uplink.c @@ -472,11 +472,10 @@ static void* uplink_mainloop(void *data) waitTime = uplink->rttTestResult == RTT_DOCHANGE ? 0 : -1; if ( waitTime == 0 ) { // 0 means poll, since we're about to change the server - } else if ( uplink->current.fd == -1 ) { - waitTime = 1000; } else { declare_now; waitTime = (int)timing_diffMs( &now, &nextAltCheck ); + logadd( LOG_DEBUG1, "Next %d for %s", waitTime / 1000, uplink->image->name ); if ( waitTime < 100 ) waitTime = 100; if ( waitTime > 5000 ) waitTime = 5000; } @@ -601,13 +600,14 @@ static void* uplink_mainloop(void *data) timing_set( &nextAltCheck, &now, altCheckInterval ); } } else if ( rttTestResult == RTT_NOT_REACHABLE ) { - atomic_compare_exchange_strong( &uplink->rttTestResult, &rttTestResult, RTT_IDLE ); - discoverFailCount++; - if ( uplink->current.fd == -1 && discoverFailCount > (SERVER_RTT_MAX_UNREACH / 2) ) { - logadd( LOG_DEBUG1, "Disabling %s:%d since no uplink is available", uplink->image->name, (int)uplink->image->rid ); - uplink->image->working = false; + if ( atomic_compare_exchange_strong( &uplink->rttTestResult, &rttTestResult, RTT_IDLE ) ) { + discoverFailCount++; + if ( uplink->current.fd == -1 && discoverFailCount > (SERVER_RTT_MAX_UNREACH / 2) ) { + logadd( LOG_DEBUG1, "Disabling %s:%d since no uplink is available", uplink->image->name, (int)uplink->image->rid ); + uplink->image->working = false; + } } - timing_set( &nextAltCheck, &now, (discoverFailCount < SERVER_RTT_MAX_UNREACH ? altCheckInterval : SERVER_RTT_INTERVAL_FAILED) ); + timing_set( &nextAltCheck, &now, (discoverFailCount < SERVER_RTT_MAX_UNREACH) ? altCheckInterval : SERVER_RTT_INTERVAL_FAILED ); } #ifdef _DEBUG if ( uplink->current.fd != -1 && !uplink->shutdown ) { diff --git a/src/shared/timing.h b/src/shared/timing.h index f3d8802..f23bfeb 100644 --- a/src/shared/timing.h +++ b/src/shared/timing.h @@ -22,7 +22,7 @@ extern struct timespec basetime; /** * Assign src to dst while adding secs seconds. */ -#define timing_set(dst,src,secs) do { (dst)->tv_sec = (src)->tv_sec + secs; (dst)->tv_nsec = (src)->tv_nsec; } while (0) +#define timing_set(dst,src,secs) do { (dst)->tv_sec = (src)->tv_sec + (secs); (dst)->tv_nsec = (src)->tv_nsec; } while (0) /** * Define variable now, initialize to timing_get. -- cgit v1.2.3-55-g7522 From 0edf0a0888b1e40769e19eee031c2cefdcf37d26 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Mon, 2 Sep 2019 13:26:47 +0200 Subject: [SERVER] Fix compiler warnings --- src/server/altservers.c | 4 ++-- src/server/reference.h | 4 ++-- src/shared/protocol.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/shared') diff --git a/src/server/altservers.c b/src/server/altservers.c index ff3c95b..9e30cd0 100644 --- a/src/server/altservers.c +++ b/src/server/altservers.c @@ -140,7 +140,7 @@ int altservers_getListForClient(dnbd3_host_t *host, dnbd3_server_entry_t *output if ( altServers[i].host.type == 0 || altServers[i].isPrivate ) continue; // Slot is empty or uplink is for replication only if ( host->type == altServers[i].host.type ) { - scores[i] = 10 + altservers_netCloseness( host, &altServers[i].host ); + scores[i] = (uint16_t)( 10 + altservers_netCloseness( host, &altServers[i].host ) ); } else { scores[i] = 1; // Wrong address family } @@ -400,7 +400,7 @@ const dnbd3_host_t* altservers_indexToHost(int server) static void altservers_findUplinkInternal(dnbd3_uplink_t *uplink) { const int ALTS = 4; - int ret, itAlt, numAlts, current; + int itAlt, numAlts, current; bool panic; int servers[ALTS + 1]; struct timespec start, end; diff --git a/src/server/reference.h b/src/server/reference.h index 2a80955..4eda546 100644 --- a/src/server/reference.h +++ b/src/server/reference.h @@ -46,12 +46,12 @@ static inline void ref_put( ref *ref ) } } -#define ref_get_uplink(wr) ({ \ +#define ref_get_uplink(wr) __extension__({ \ ref* ref = ref_get( wr ); \ ref == NULL ? NULL : container_of(ref, dnbd3_uplink_t, reference); \ }) -#define ref_get_cachemap(image) ({ \ +#define ref_get_cachemap(image) __extension__({ \ ref* ref = ref_get( &(image)->ref_cacheMap ); \ ref == NULL ? NULL : container_of(ref, dnbd3_cache_map_t, reference); \ }) diff --git a/src/shared/protocol.h b/src/shared/protocol.h index 92dbe11..2b21c21 100644 --- a/src/shared/protocol.h +++ b/src/shared/protocol.h @@ -20,7 +20,7 @@ #define COND_HOPCOUNT(vers,hopcount) ( (vers) >= 3 ? (hopcount) : 0 ) // 2017-11-02: Macro to set flags in select image message properly if we're a server, as BG_REP depends on global var -#define SI_SERVER_FLAGS ( (_pretendClient ? 0 : FLAGS8_SERVER) | (_backgroundReplication == BGR_FULL ? FLAGS8_BG_REP : 0) ) +#define SI_SERVER_FLAGS ( (uint8_t)( (_pretendClient ? 0 : FLAGS8_SERVER) | (_backgroundReplication == BGR_FULL ? FLAGS8_BG_REP : 0) ) ) #define REPLY_OK (0) #define REPLY_ERRNO (-1) -- cgit v1.2.3-55-g7522 From 53fbcc89f027992e29c96086dd32eb624e181eac Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Tue, 17 Sep 2019 14:56:03 +0200 Subject: [*] Fix/simplify checks for linux --- src/server/integrity.c | 4 ++-- src/shared/fdsignal.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/shared') diff --git a/src/server/integrity.c b/src/server/integrity.c index 2058104..1fbd9dc 100644 --- a/src/server/integrity.c +++ b/src/server/integrity.c @@ -136,7 +136,7 @@ static void* integrity_main(void * data UNUSED) int i; setThreadName( "image-check" ); blockNoncriticalSignals(); -#if defined(linux) || defined(__linux) +#if defined(__linux__) // Setting nice of this thread - this is not POSIX conforming, so check if other platforms support this. // POSIX says that setpriority() should set the nice value of all threads belonging to the current process, // but on linux you can do this per thread. @@ -291,7 +291,7 @@ static void flushFileRange(dnbd3_image_t *image, uint64_t start, uint64_t end) } if ( flushFd == -1 ) return; -#if defined(linux) || defined(__linux) +#if defined(__linux__) while ( sync_file_range( flushFd, start, end - start, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER ) == -1 ) #else while ( fsync( flushFd ) == -1 ) // TODO: fdatasync() should be available since FreeBSD 12.0 ... Might be a tad bit faster diff --git a/src/shared/fdsignal.c b/src/shared/fdsignal.c index 5e5cf7f..087b6f1 100644 --- a/src/shared/fdsignal.c +++ b/src/shared/fdsignal.c @@ -1,6 +1,6 @@ #include "fdsignal.h" -#if defined(linux) || defined(__linux) || defined(__linux__) +#if defined(__linux__) //#warning "Using eventfd based signalling" #include "fdsignal.inc/eventfd.c" #elif __SIZEOF_INT__ == 4 && __SIZEOF_POINTER__ == 8 -- cgit v1.2.3-55-g7522 From 3d2f1f605e07b511c4ebf79c936c7061dd918957 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 19 Mar 2020 20:43:15 +0100 Subject: [SERVER] Use PCLMUL for crc32 on AMD64 if available This is about 16x as fast as before with the lookup table for processing 4 bytes at a time and should work on any AMD64 CPU made in the last decade. We still need an AltiVec implementation for G5 though. --- src/shared/crc32.c | 221 +++++++++++++++++++++++++++++++++++++++++------------ src/types.h | 12 +-- 2 files changed, 178 insertions(+), 55 deletions(-) (limited to 'src/shared') diff --git a/src/shared/crc32.c b/src/shared/crc32.c index db941d3..50f476a 100644 --- a/src/shared/crc32.c +++ b/src/shared/crc32.c @@ -41,21 +41,20 @@ #include "../types.h" #include -#define FAR +#if defined(__x86_64__) || defined(__amd64__) +#include +#include +#include +#include +#define zalign(n) __attribute__((aligned(n))) +#endif + #define OF(args) args -#define local static /* Definitions for doing the crc four data bytes at a time. */ -#if !defined(NOBYFOUR) -# define BYFOUR -#endif -#ifdef BYFOUR -# define TBLS 8 -#else -# define TBLS 1 -#endif /* BYFOUR */ +#define TBLS 8 -local const uint32_t crc_table[TBLS][256] = +static const uint32_t crc_table[TBLS][256] = { { 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U, @@ -110,7 +109,6 @@ local const uint32_t crc_table[TBLS][256] = 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U, 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU, 0x2d02ef8dU -#ifdef BYFOUR }, { 0x00000000U, 0x191b3141U, 0x32366282U, 0x2b2d53c3U, 0x646cc504U, @@ -489,38 +487,159 @@ local const uint32_t crc_table[TBLS][256] = 0x95e6b8b1U, 0x7b490da3U, 0x1e2eb11bU, 0x483ed243U, 0x2d596efbU, 0xc3f6dbe9U, 0xa6916751U, 0x1fa9b0ccU, 0x7ace0c74U, 0x9461b966U, 0xf10605deU -#endif } }; -#ifdef NO_ENDIAN -// Currently not in use, always use the BYFOUR method with known endianness -/* ========================================================================= */ -#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) -#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 +#define PCLMUL_MIN_LEN 64 +#define PCLMUL_ALIGN 16 +#define PCLMUL_ALIGN_MASK 15 -/* ========================================================================= */ -uint32_t crc32(crc, buf, len) - uint32_t crc; - const uint8_t *buf; - size_t len; +#if defined(__x86_64__) || defined(__amd64__) +/* crc32_simd.c + * + * Copyright 2017 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the Chromium source repository LICENSE file. + * + * crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer + * length must be at least 64, and a multiple of 16. Based on: + * + * "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" + * V. Gopal, E. Ozturk, et al., 2009, http://intel.ly/2ySEwL0 + */ +static uint32_t +__attribute__((target("pclmul"))) +crc32pclmul(uint32_t crc, const uint8_t *buf, size_t len) { - if (buf == NULL) return 0; + /* + * Definitions of the bit-reflected domain constants k1,k2,k3, etc and + * the CRC32+Barrett polynomials given at the end of the paper. + */ + static const uint64_t zalign(16) k1k2[] = { 0x0154442bd4, 0x01c6e41596 }; + static const uint64_t zalign(16) k3k4[] = { 0x01751997d0, 0x00ccaa009e }; + static const uint64_t zalign(16) k5k0[] = { 0x0163cd6124, 0x0000000000 }; + static const uint64_t zalign(16) poly[] = { 0x01db710641, 0x01f7011641 }; + + __m128i x0, x1, x2, x3, x4, x5, x6, x7, x8, y5, y6, y7, y8; + + /* + * There's at least one block of 64. + */ + x1 = _mm_loadu_si128((__m128i *)(buf + 0x00)); + x2 = _mm_loadu_si128((__m128i *)(buf + 0x10)); + x3 = _mm_loadu_si128((__m128i *)(buf + 0x20)); + x4 = _mm_loadu_si128((__m128i *)(buf + 0x30)); + + x1 = _mm_xor_si128(x1, _mm_cvtsi32_si128(crc)); + + x0 = _mm_load_si128((__m128i *)k1k2); + + buf += 64; + len -= 64; + + /* + * Parallel fold blocks of 64, if any. + */ + while (len >= 64) + { + x5 = _mm_clmulepi64_si128(x1, x0, 0x00); + x6 = _mm_clmulepi64_si128(x2, x0, 0x00); + x7 = _mm_clmulepi64_si128(x3, x0, 0x00); + x8 = _mm_clmulepi64_si128(x4, x0, 0x00); + + x1 = _mm_clmulepi64_si128(x1, x0, 0x11); + x2 = _mm_clmulepi64_si128(x2, x0, 0x11); + x3 = _mm_clmulepi64_si128(x3, x0, 0x11); + x4 = _mm_clmulepi64_si128(x4, x0, 0x11); + + y5 = _mm_loadu_si128((__m128i *)(buf + 0x00)); + y6 = _mm_loadu_si128((__m128i *)(buf + 0x10)); + y7 = _mm_loadu_si128((__m128i *)(buf + 0x20)); + y8 = _mm_loadu_si128((__m128i *)(buf + 0x30)); + + x1 = _mm_xor_si128(x1, x5); + x2 = _mm_xor_si128(x2, x6); + x3 = _mm_xor_si128(x3, x7); + x4 = _mm_xor_si128(x4, x8); + + x1 = _mm_xor_si128(x1, y5); + x2 = _mm_xor_si128(x2, y6); + x3 = _mm_xor_si128(x3, y7); + x4 = _mm_xor_si128(x4, y8); - crc = crc ^ 0xffffffffU; - while (len >= 8) { - DO8; - len -= 8; + buf += 64; + len -= 64; } - if (len) do { - DO1; - } while (--len); - return crc ^ 0xffffffffU; + + /* + * Fold into 128-bits. + */ + x0 = _mm_load_si128((__m128i *)k3k4); + + x5 = _mm_clmulepi64_si128(x1, x0, 0x00); + x1 = _mm_clmulepi64_si128(x1, x0, 0x11); + x1 = _mm_xor_si128(x1, x2); + x1 = _mm_xor_si128(x1, x5); + + x5 = _mm_clmulepi64_si128(x1, x0, 0x00); + x1 = _mm_clmulepi64_si128(x1, x0, 0x11); + x1 = _mm_xor_si128(x1, x3); + x1 = _mm_xor_si128(x1, x5); + + x5 = _mm_clmulepi64_si128(x1, x0, 0x00); + x1 = _mm_clmulepi64_si128(x1, x0, 0x11); + x1 = _mm_xor_si128(x1, x4); + x1 = _mm_xor_si128(x1, x5); + + /* + * Single fold blocks of 16, if any. + */ + while (len >= 16) + { + x2 = _mm_loadu_si128((__m128i *)buf); + + x5 = _mm_clmulepi64_si128(x1, x0, 0x00); + x1 = _mm_clmulepi64_si128(x1, x0, 0x11); + x1 = _mm_xor_si128(x1, x2); + x1 = _mm_xor_si128(x1, x5); + + buf += 16; + len -= 16; + } + + /* + * Fold 128-bits to 64-bits. + */ + x2 = _mm_clmulepi64_si128(x1, x0, 0x10); + x3 = _mm_setr_epi32(~0, 0, ~0, 0); + x1 = _mm_srli_si128(x1, 8); + x1 = _mm_xor_si128(x1, x2); + + x0 = _mm_loadl_epi64((__m128i*)k5k0); + + x2 = _mm_srli_si128(x1, 4); + x1 = _mm_and_si128(x1, x3); + x1 = _mm_clmulepi64_si128(x1, x0, 0x00); + x1 = _mm_xor_si128(x1, x2); + + /* + * Barret reduce to 32-bits. + */ + x0 = _mm_load_si128((__m128i*)poly); + + x2 = _mm_and_si128(x1, x3); + x2 = _mm_clmulepi64_si128(x2, x0, 0x10); + x2 = _mm_and_si128(x2, x3); + x2 = _mm_clmulepi64_si128(x2, x0, 0x00); + x1 = _mm_xor_si128(x1, x2); + + /* + * Return the crc32. + */ + return _mm_extract_epi32(x1, 1); } #endif -#ifdef BYFOUR - /* This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit integer pointer type. This violates the strict aliasing rule, where a @@ -533,7 +652,7 @@ uint32_t crc32(crc, buf, len) writes to the buffer that is passed to these routines. */ -#ifdef LITTLE_ENDIAN +#ifdef DNBD3_LITTLE_ENDIAN /* ========================================================================= */ #define DOLIT4 c ^= *buf4++; \ c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ @@ -547,16 +666,25 @@ uint32_t crc32(crc, buf, len) size_t len; { if (buf == NULL) return 0; - register uint32_t c; - register const uint32_t FAR *buf4; + uint32_t c; c = ~crc; - while (len && ((uintptr_t)buf & 3)) { + while (len && ((uintptr_t)buf & PCLMUL_ALIGN_MASK)) { c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); len--; } - - buf4 = (const uint32_t FAR *)(const void FAR *)buf; +#if defined(__x86_64__) || defined(__amd64__) + static atomic_int pclmul = -1; + if (pclmul == -1) { + pclmul = __builtin_cpu_supports("pclmul"); + } + if (pclmul && len >= PCLMUL_MIN_LEN) { + c = crc32pclmul(c, buf, len & ~PCLMUL_ALIGN_MASK); + buf += len & ~PCLMUL_ALIGN_MASK; + len &= PCLMUL_ALIGN_MASK; + } +#else + const uint32_t *buf4 = (const uint32_t *)(const void *)buf; while (len >= 32) { DOLIT32; len -= 32; @@ -565,7 +693,8 @@ uint32_t crc32(crc, buf, len) DOLIT4; len -= 4; } - buf = (const uint8_t FAR *)buf4; + buf = (const uint8_t *)buf4; +#endif if (len) do { c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); @@ -575,7 +704,7 @@ uint32_t crc32(crc, buf, len) } #endif -#ifdef BIG_ENDIAN +#ifdef DNBD3_BIG_ENDIAN /* ========================================================================= */ #define DOBIG4 c ^= *buf4++; \ c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ @@ -590,7 +719,7 @@ uint32_t crc32(crc, buf, len) { if (buf == NULL) return 0; register uint32_t c; - register const uint32_t FAR *buf4; + register const uint32_t *buf4; c = ~net_order_32(crc); while (len && ((uintptr_t)buf & 3)) { @@ -598,7 +727,7 @@ uint32_t crc32(crc, buf, len) len--; } - buf4 = (const uint32_t FAR *)(const void FAR *)buf; + buf4 = (const uint32_t *)(const void *)buf; while (len >= 32) { DOBIG32; len -= 32; @@ -607,7 +736,7 @@ uint32_t crc32(crc, buf, len) DOBIG4; len -= 4; } - buf = (const uint8_t FAR *)buf4; + buf = (const uint8_t *)buf4; if (len) do { c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); @@ -617,5 +746,3 @@ uint32_t crc32(crc, buf, len) } #endif -#endif /* BYFOUR */ - diff --git a/src/types.h b/src/types.h index dc8e501..83416f4 100644 --- a/src/types.h +++ b/src/types.h @@ -95,9 +95,7 @@ (a).size = net_order_32((a).size); \ } while (0) #define ENDIAN_MODE "Big Endian" -#ifndef BIG_ENDIAN -#define BIG_ENDIAN -#endif +#define DNBD3_BIG_ENDIAN #elif defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN) || (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || defined(__i386__) || defined(__i386) || defined(__x86_64) #define dnbd3_packet_magic ((uint16_t)( (0x73) | (0x72 << 8) )) // Make little endian our network byte order as probably 99.999% of machines this will be used on are LE @@ -107,9 +105,7 @@ #define fixup_request(a) while(0) #define fixup_reply(a) while(0) #define ENDIAN_MODE "Little Endian" -#ifndef LITTLE_ENDIAN -#define LITTLE_ENDIAN -#endif +#define DNBD3_LITTLE_ENDIAN #else #error "Unknown Endianness" #endif @@ -156,10 +152,10 @@ typedef struct __attribute__((packed)) uint32_t size; // 4byte union { struct { -#ifdef LITTLE_ENDIAN +#ifdef DNBD3_LITTLE_ENDIAN uint64_t offset_small:56; // 7byte uint8_t hops; // 1byte -#elif defined(BIG_ENDIAN) +#elif defined(DNBD3_BIG_ENDIAN) uint8_t hops; // 1byte uint64_t offset_small:56; // 7byte #endif -- cgit v1.2.3-55-g7522 From 0f47d29912b0e3d0e387db715a16b7b4f273f389 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 19 Mar 2020 21:15:42 +0100 Subject: [SERVER] crc32: Fix compile with optimizations Should have tested in "Release" mode I guess. Seems we're at about 24x performance this way, so hooray. --- src/shared/crc32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/shared') diff --git a/src/shared/crc32.c b/src/shared/crc32.c index 50f476a..098615f 100644 --- a/src/shared/crc32.c +++ b/src/shared/crc32.c @@ -508,7 +508,7 @@ static const uint32_t crc_table[TBLS][256] = * V. Gopal, E. Ozturk, et al., 2009, http://intel.ly/2ySEwL0 */ static uint32_t -__attribute__((target("pclmul"))) +__attribute__((target("pclmul,sse4.1"))) crc32pclmul(uint32_t crc, const uint8_t *buf, size_t len) { /* @@ -676,7 +676,7 @@ uint32_t crc32(crc, buf, len) #if defined(__x86_64__) || defined(__amd64__) static atomic_int pclmul = -1; if (pclmul == -1) { - pclmul = __builtin_cpu_supports("pclmul"); + pclmul = __builtin_cpu_supports("pclmul") && __builtin_cpu_supports("sse4.1"); } if (pclmul && len >= PCLMUL_MIN_LEN) { c = crc32pclmul(c, buf, len & ~PCLMUL_ALIGN_MASK); -- cgit v1.2.3-55-g7522 From be628c705594a36f6aa649613ddf6c86039192a1 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 19 Mar 2020 21:45:12 +0100 Subject: [SHARED] crc32: Don't skip table lookup if PCLMUL is unavailable --- src/shared/crc32.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'src/shared') diff --git a/src/shared/crc32.c b/src/shared/crc32.c index 098615f..c3e566f 100644 --- a/src/shared/crc32.c +++ b/src/shared/crc32.c @@ -682,19 +682,20 @@ uint32_t crc32(crc, buf, len) c = crc32pclmul(c, buf, len & ~PCLMUL_ALIGN_MASK); buf += len & ~PCLMUL_ALIGN_MASK; len &= PCLMUL_ALIGN_MASK; - } -#else - const uint32_t *buf4 = (const uint32_t *)(const void *)buf; - while (len >= 32) { - DOLIT32; - len -= 32; - } - while (len >= 4) { - DOLIT4; - len -= 4; - } - buf = (const uint8_t *)buf4; + } else #endif + do { + const uint32_t *buf4 = (const uint32_t *)(const void *)buf; + while (len >= 32) { + DOLIT32; + len -= 32; + } + while (len >= 4) { + DOLIT4; + len -= 4; + } + buf = (const uint8_t *)buf4; + } while (0); if (len) do { c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); -- cgit v1.2.3-55-g7522 From 431ddd8bfb78a20f7d2739c95aefb1402c228091 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Mon, 23 Mar 2020 10:53:27 +0100 Subject: [SHARED] connect_ms might change above, don't check --- src/shared/sockhelper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shared') diff --git a/src/shared/sockhelper.c b/src/shared/sockhelper.c index ec80659..9e9109c 100644 --- a/src/shared/sockhelper.c +++ b/src/shared/sockhelper.c @@ -346,7 +346,7 @@ int sock_multiConnect(poll_list_t* list, const dnbd3_host_t* host, int connect_m if ( i != list->count ) list->entry[i] = list->entry[list->count]; if ( fd != -1 ) { sock_set_block( fd ); - if ( rw_ms != -1 && rw_ms != connect_ms ) { + if ( rw_ms != -1 ) { sock_setTimeout( fd, rw_ms ); } return fd; -- cgit v1.2.3-55-g7522 From dcece877215a0d909553ae9301a02d031b37b715 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Tue, 30 Jun 2020 11:21:31 +0200 Subject: [SHARED] Fix 16 byte information leakage in select image message --- src/shared/protocol.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/shared') diff --git a/src/shared/protocol.h b/src/shared/protocol.h index 2b21c21..05fd2bf 100644 --- a/src/shared/protocol.h +++ b/src/shared/protocol.h @@ -69,10 +69,8 @@ static inline bool dnbd3_select_image(int sock, const char *name, uint16_t rid, request.magic = dnbd3_packet_magic; request.cmd = CMD_SELECT_IMAGE; request.size = (uint32_t)len; -#ifdef _DEBUG request.handle = 0; request.offset = 0; -#endif fixup_request( request ); iov[0].iov_base = &request; iov[0].iov_len = sizeof(request); -- cgit v1.2.3-55-g7522