summaryrefslogtreecommitdiffstats
path: root/src/arch
diff options
context:
space:
mode:
authorMichael Brown2016-05-10 18:13:05 +0200
committerMichael Brown2016-05-11 09:16:36 +0200
commit47931a4de53ccdeda061c59aa0919f152cf0dfdf (patch)
tree70c3de07d8027780593203865ca32aff79aca90b /src/arch
parent[arm] Add optimised string functions for 64-bit ARM (diff)
downloadipxe-47931a4de53ccdeda061c59aa0919f152cf0dfdf.tar.gz
ipxe-47931a4de53ccdeda061c59aa0919f152cf0dfdf.tar.xz
ipxe-47931a4de53ccdeda061c59aa0919f152cf0dfdf.zip
[arm] Add optimised TCP/IP checksumming for 64-bit ARM
Signed-off-by: Michael Brown <mcb30@ipxe.org>
Diffstat (limited to 'src/arch')
-rw-r--r--src/arch/arm32/include/bits/tcpip.h (renamed from src/arch/arm/include/bits/tcpip.h)0
-rw-r--r--src/arch/arm64/core/arm64_tcpip.c175
-rw-r--r--src/arch/arm64/include/bits/tcpip.h15
3 files changed, 190 insertions, 0 deletions
diff --git a/src/arch/arm/include/bits/tcpip.h b/src/arch/arm32/include/bits/tcpip.h
index fc3c5b3f..fc3c5b3f 100644
--- a/src/arch/arm/include/bits/tcpip.h
+++ b/src/arch/arm32/include/bits/tcpip.h
diff --git a/src/arch/arm64/core/arm64_tcpip.c b/src/arch/arm64/core/arm64_tcpip.c
new file mode 100644
index 00000000..0ef04ea4
--- /dev/null
+++ b/src/arch/arm64/core/arm64_tcpip.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ * You can also choose to distribute this program under the terms of
+ * the Unmodified Binary Distribution Licence (as given in the file
+ * COPYING.UBDL), provided that you have satisfied its requirements.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+/** @file
+ *
+ * TCP/IP checksum
+ *
+ */
+
+#include <strings.h>
+#include <ipxe/tcpip.h>
+
+/** Alignment used by main checksumming loop */
+#define TCPIP_CHKSUM_ALIGN 16
+
+/** Number of steps in each iteration of the unrolled main checksumming loop */
+#define TCPIP_CHKSUM_UNROLL 4
+
+/**
+ * Calculate continued TCP/IP checkum
+ *
+ * @v sum Checksum of already-summed data, in network byte order
+ * @v data Data buffer
+ * @v len Length of data buffer
+ * @ret sum Updated checksum, in network byte order
+ */
+uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data,
+ size_t len ) {
+ intptr_t start;
+ intptr_t end;
+ intptr_t mid;
+ unsigned int pre;
+ unsigned int post;
+ unsigned int first;
+ uint64_t discard_low;
+ uint64_t discard_high;
+
+ /* Avoid potentially undefined shift operation */
+ if ( len == 0 )
+ return sum;
+
+ /* Find maximally-aligned midpoint. For short blocks of data,
+ * this may be aligned to fewer than 16 bytes.
+ */
+ start = ( ( intptr_t ) data );
+ end = ( start + len );
+ mid = ( end &
+ ~( ( ~( 1UL << 63 ) ) >> ( 64 - flsl ( start ^ end ) ) ) );
+
+ /* Calculate pre- and post-alignment lengths */
+ pre = ( ( mid - start ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );
+ post = ( ( end - mid ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );
+
+ /* Calculate number of steps in first iteration of unrolled loop */
+ first = ( ( ( len - pre - post ) / TCPIP_CHKSUM_ALIGN ) &
+ ( TCPIP_CHKSUM_UNROLL - 1 ) );
+
+ /* Calculate checksum */
+ __asm__ ( /* Invert sum */
+ "eor %w0, %w0, #0xffff\n\t"
+ /* Clear carry flag */
+ "cmn xzr, xzr\n\t"
+ /* Byteswap and sum pre-alignment byte, if applicable */
+ "tbz %w4, #0, 1f\n\t"
+ "ldrb %w2, [%1], #1\n\t"
+ "rev16 %w0, %w0\n\t"
+ "rev16 %w2, %w2\n\t"
+ "adcs %0, %0, %2\n\t"
+ "\n1:\n\t"
+ /* Sum pre-alignment halfword, if applicable */
+ "tbz %w4, #1, 1f\n\t"
+ "ldrh %w2, [%1], #2\n\t"
+ "adcs %0, %0, %2\n\t"
+ "\n1:\n\t"
+ /* Sum pre-alignment word, if applicable */
+ "tbz %w4, #2, 1f\n\t"
+ "ldr %w2, [%1], #4\n\t"
+ "adcs %0, %0, %2\n\t"
+ "\n1:\n\t"
+ /* Sum pre-alignment doubleword, if applicable */
+ "tbz %w4, #3, 1f\n\t"
+ "ldr %2, [%1], #8\n\t"
+ "adcs %0, %0, %2\n\t"
+ "\n1:\n\t"
+ /* Jump into unrolled (x4) main loop */
+ "adr %2, 2f\n\t"
+ "sub %2, %2, %5, lsl #3\n\t"
+ "sub %2, %2, %5, lsl #2\n\t"
+ "br %2\n\t"
+ "\n1:\n\t"
+ "ldp %2, %3, [%1], #16\n\t"
+ "adcs %0, %0, %2\n\t"
+ "adcs %0, %0, %3\n\t"
+ "ldp %2, %3, [%1], #16\n\t"
+ "adcs %0, %0, %2\n\t"
+ "adcs %0, %0, %3\n\t"
+ "ldp %2, %3, [%1], #16\n\t"
+ "adcs %0, %0, %2\n\t"
+ "adcs %0, %0, %3\n\t"
+ "ldp %2, %3, [%1], #16\n\t"
+ "adcs %0, %0, %2\n\t"
+ "adcs %0, %0, %3\n\t"
+ "\n2:\n\t"
+ "sub %2, %1, %6\n\t"
+ "cbnz %2, 1b\n\t"
+ /* Sum post-alignment doubleword, if applicable */
+ "tbz %w7, #3, 1f\n\t"
+ "ldr %2, [%1], #8\n\t"
+ "adcs %0, %0, %2\n\t"
+ "\n1:\n\t"
+ /* Sum post-alignment word, if applicable */
+ "tbz %w7, #2, 1f\n\t"
+ "ldr %w2, [%1], #4\n\t"
+ "adcs %0, %0, %2\n\t"
+ "\n1:\n\t"
+ /* Sum post-alignment halfword, if applicable */
+ "tbz %w7, #1, 1f\n\t"
+ "ldrh %w2, [%1], #2\n\t"
+ "adcs %0, %0, %2\n\t"
+ "\n1:\n\t"
+ /* Sum post-alignment byte, if applicable */
+ "tbz %w7, #0, 1f\n\t"
+ "ldrb %w2, [%1], #1\n\t"
+ "adcs %0, %0, %2\n\t"
+ "\n1:\n\t"
+ /* Fold down to a uint32_t plus carry flag */
+ "lsr %2, %0, #32\n\t"
+ "adcs %w0, %w0, %w2\n\t"
+ /* Fold down to a uint16_t plus carry in bit 16 */
+ "ubfm %2, %0, #0, #15\n\t"
+ "ubfm %3, %0, #16, #31\n\t"
+ "adc %w0, %w2, %w3\n\t"
+ /* Fold down to a uint16_t */
+ "tbz %w0, #16, 1f\n\t"
+ "mov %w2, #0xffff\n\t"
+ "sub %w0, %w0, %w2\n\t"
+ "tbz %w0, #16, 1f\n\t"
+ "sub %w0, %w0, %w2\n\t"
+ "\n1:\n\t"
+ /* Byteswap back, if applicable */
+ "tbz %w4, #0, 1f\n\t"
+ "rev16 %w0, %w0\n\t"
+ "\n1:\n\t"
+ /* Invert sum */
+ "eor %w0, %w0, #0xffff\n\t"
+ : "+r" ( sum ), "+r" ( data ), "=&r" ( discard_low ),
+ "=&r" ( discard_high )
+ : "r" ( pre ), "r" ( first ), "r" ( end - post ),
+ "r" ( post )
+ : "cc" );
+
+ return sum;
+}
diff --git a/src/arch/arm64/include/bits/tcpip.h b/src/arch/arm64/include/bits/tcpip.h
new file mode 100644
index 00000000..68686534
--- /dev/null
+++ b/src/arch/arm64/include/bits/tcpip.h
@@ -0,0 +1,15 @@
+#ifndef _BITS_TCPIP_H
+#define _BITS_TCPIP_H
+
+/** @file
+ *
+ * Transport-network layer interface
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+extern uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data,
+ size_t len );
+
+#endif /* _BITS_TCPIP_H */