summaryrefslogtreecommitdiffstats
path: root/src/net/tcp.c
diff options
context:
space:
mode:
authorMichael Brown2014-05-07 03:31:48 +0200
committerMichael Brown2014-05-12 18:19:26 +0200
commitd28bb51f445250d7bd7ccfb5b53f2ffa985e1fba (patch)
tree67370935765c325819db66f382a7ce8492d5fbd6 /src/net/tcp.c
parent[nfs] Fix an invalid free() when loading a symlink (diff)
downloadipxe-d28bb51f445250d7bd7ccfb5b53f2ffa985e1fba.tar.gz
ipxe-d28bb51f445250d7bd7ccfb5b53f2ffa985e1fba.tar.xz
ipxe-d28bb51f445250d7bd7ccfb5b53f2ffa985e1fba.zip
[tcp] Defer sending ACKs until all received packets have been processed
When running inside a virtual machine (or when using the UNDI driver), transmitting packets can be expensive. When we receive several packets in one poll (e.g. because a slow BIOS timer interrupt routine has caused us to fall behind in processing), we can safely send just a single ACK to cover all of the received packets. This reduces the time spent transmitting and allows us to clear the backlog much faster. Various RFCs (starting with RFC1122) state that there should be an ACK for at least every second segment. We choose not to enforce this rule. Under normal operation each poll should find at most one received packet, and we will then not delay any ACKs. We delay (i.e. omit) ACKs only when under sufficiently heavy load that we are finding multiple packets per poll; under these conditions it is important to clear the backlog quickly since any delay may lead to dropped packets. Signed-off-by: Michael Brown <mcb30@ipxe.org>
Diffstat (limited to 'src/net/tcp.c')
-rw-r--r--src/net/tcp.c33
1 files changed, 25 insertions, 8 deletions
diff --git a/src/net/tcp.c b/src/net/tcp.c
index 1c48769a..987cb63e 100644
--- a/src/net/tcp.c
+++ b/src/net/tcp.c
@@ -16,6 +16,7 @@
#include <ipxe/uri.h>
#include <ipxe/netdevice.h>
#include <ipxe/profile.h>
+#include <ipxe/process.h>
#include <ipxe/tcpip.h>
#include <ipxe/tcp.h>
@@ -107,6 +108,8 @@ struct tcp_connection {
struct list_head tx_queue;
/** Receive queue */
struct list_head rx_queue;
+ /** Transmission process */
+ struct process process;
/** Retransmission timer */
struct retry_timer timer;
/** Shutdown (TIME_WAIT) timer */
@@ -166,6 +169,7 @@ static struct profiler tcp_rx_profiler __profiler = { .name = "tcp.rx" };
static struct profiler tcp_xfer_profiler __profiler = { .name = "tcp.xfer" };
/* Forward declarations */
+static struct process_descriptor tcp_process_desc;
static struct interface_descriptor tcp_xfer_desc;
static void tcp_expired ( struct retry_timer *timer, int over );
static void tcp_wait_expired ( struct retry_timer *timer, int over );
@@ -273,6 +277,7 @@ static int tcp_open ( struct interface *xfer, struct sockaddr *peer,
DBGC ( tcp, "TCP %p allocated\n", tcp );
ref_init ( &tcp->refcnt, NULL );
intf_init ( &tcp->xfer, &tcp_xfer_desc, &tcp->refcnt );
+ process_init_stopped ( &tcp->process, &tcp_process_desc, &tcp->refcnt );
timer_init ( &tcp->timer, tcp_expired, &tcp->refcnt );
timer_init ( &tcp->wait, tcp_wait_expired, &tcp->refcnt );
tcp->prev_tcp_state = TCP_CLOSED;
@@ -369,6 +374,7 @@ static void tcp_close ( struct tcp_connection *tcp, int rc ) {
pending_put ( &tcp->pending_flags );
/* Remove from list and drop reference */
+ process_del ( &tcp->process );
stop_timer ( &tcp->timer );
stop_timer ( &tcp->wait );
list_del ( &tcp->list );
@@ -497,7 +503,7 @@ static size_t tcp_process_tx_queue ( struct tcp_connection *tcp, size_t max_len,
* will have been started if necessary, and so the stack will
* eventually attempt to retransmit the failed packet.
*/
-static int tcp_xmit ( struct tcp_connection *tcp ) {
+static void tcp_xmit ( struct tcp_connection *tcp ) {
struct io_buffer *iobuf;
struct tcp_header *tcphdr;
struct tcp_mss_option *mssopt;
@@ -517,7 +523,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
/* If retransmission timer is already running, do nothing */
if ( timer_running ( &tcp->timer ) )
- return 0;
+ return;
/* Calculate both the actual (payload) and sequence space
* lengths that we wish to transmit.
@@ -537,7 +543,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
/* If we have nothing to transmit, stop now */
if ( ( seq_len == 0 ) && ! ( tcp->flags & TCP_ACK_PENDING ) )
- return 0;
+ return;
/* If we are transmitting anything that requires
* acknowledgement (i.e. consumes sequence space), start the
@@ -553,7 +559,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
DBGC ( tcp, "TCP %p could not allocate iobuf for %08x..%08x "
"%08x\n", tcp, tcp->snd_seq, ( tcp->snd_seq + seq_len ),
tcp->rcv_ack );
- return -ENOMEM;
+ return;
}
iob_reserve ( iobuf, TCP_MAX_HEADER_LEN );
@@ -620,16 +626,19 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
DBGC ( tcp, "TCP %p could not transmit %08x..%08x %08x: %s\n",
tcp, tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ),
tcp->rcv_ack, strerror ( rc ) );
- return rc;
+ return;
}
/* Clear ACK-pending flag */
tcp->flags &= ~TCP_ACK_PENDING;
profile_stop ( &tcp_tx_profiler );
- return 0;
}
+/** TCP process descriptor */
+static struct process_descriptor tcp_process_desc =
+ PROC_DESC_ONCE ( struct tcp_connection, process, tcp_xmit );
+
/**
* Retransmission timer expired
*
@@ -1272,8 +1281,16 @@ static int tcp_rx ( struct io_buffer *iobuf,
/* Dump out any state change as a result of the received packet */
tcp_dump_state ( tcp );
- /* Send out any pending data */
- tcp_xmit ( tcp );
+ /* Schedule transmission of ACK (and any pending data). If we
+ * have received any out-of-order packets (i.e. if the receive
+ * queue remains non-empty after processing) then send the ACK
+ * immediately in order to trigger Fast Retransmission.
+ */
+ if ( list_empty ( &tcp->rx_queue ) ) {
+ process_add ( &tcp->process );
+ } else {
+ tcp_xmit ( tcp );
+ }
/* If this packet was the last we expect to receive, set up
* timer to expire and cause the connection to be freed.