diff options
author | Michael Brown | 2012-06-29 15:30:18 +0200 |
---|---|---|
committer | Michael Brown | 2012-06-29 16:05:33 +0200 |
commit | ea61075c60e6417203bbb5fd54e1f313c99c164c (patch) | |
tree | 2bae13fae87f6a0fadf2d1a1ca39734db8149d87 /src/net/tcp.c | |
parent | [undi] Align the received frame payload for faster processing (diff) | |
download | ipxe-ea61075c60e6417203bbb5fd54e1f313c99c164c.tar.gz ipxe-ea61075c60e6417203bbb5fd54e1f313c99c164c.tar.xz ipxe-ea61075c60e6417203bbb5fd54e1f313c99c164c.zip |
[tcp] Add support for TCP window scaling
The maximum unscaled TCP window (64kB) implies a maximum bandwidth of
around 300kB/s on a WAN link with an RTT of 200ms. Add support for
the TCP window scaling option to remove this upper limit.
Signed-off-by: Michael Brown <mcb30@ipxe.org>
Diffstat (limited to 'src/net/tcp.c')
-rw-r--r-- | src/net/tcp.c | 31 |
1 files changed, 29 insertions, 2 deletions
diff --git a/src/net/tcp.c b/src/net/tcp.c index c6bb4a66..b30fb1b0 100644 --- a/src/net/tcp.c +++ b/src/net/tcp.c @@ -87,6 +87,16 @@ struct tcp_connection { * Equivalent to TS.Recent in RFC 1323 terminology. */ uint32_t ts_recent; + /** Send window scale + * + * Equivalent to Snd.Wind.Scale in RFC 1323 terminology + */ + uint8_t snd_win_scale; + /** Receive window scale + * + * Equivalent to Rcv.Wind.Scale in RFC 1323 terminology + */ + uint8_t rcv_win_scale; /** Transmit queue */ struct list_head tx_queue; @@ -490,6 +500,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) { struct io_buffer *iobuf; struct tcp_header *tcphdr; struct tcp_mss_option *mssopt; + struct tcp_window_scale_padded_option *wsopt; struct tcp_timestamp_padded_option *tsopt; void *payload; unsigned int flags; @@ -497,6 +508,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) { uint32_t seq_len; uint32_t app_win; uint32_t max_rcv_win; + uint32_t max_representable_win; int rc; /* If retransmission timer is already running, do nothing */ @@ -551,6 +563,9 @@ static int tcp_xmit ( struct tcp_connection *tcp ) { app_win = xfer_window ( &tcp->xfer ); if ( max_rcv_win > app_win ) max_rcv_win = app_win; + max_representable_win = ( 0xffff << tcp->rcv_win_scale ); + if ( max_rcv_win > max_representable_win ) + max_rcv_win = max_representable_win; max_rcv_win &= ~0x03; /* Keep everything dword-aligned */ if ( tcp->rcv_win < max_rcv_win ) tcp->rcv_win = max_rcv_win; @@ -562,6 +577,11 @@ static int tcp_xmit ( struct tcp_connection *tcp ) { mssopt->kind = TCP_OPTION_MSS; mssopt->length = sizeof ( *mssopt ); mssopt->mss = htons ( TCP_MSS ); + wsopt = iob_push ( iobuf, sizeof ( *wsopt ) ); + wsopt->nop = TCP_OPTION_NOP; + wsopt->wsopt.kind = TCP_OPTION_WS; + wsopt->wsopt.length = sizeof ( wsopt->wsopt ); + wsopt->wsopt.scale = TCP_RX_WINDOW_SCALE; } if ( ( flags & TCP_SYN ) || ( tcp->flags & TCP_TS_ENABLED ) ) { tsopt = iob_push ( iobuf, sizeof ( *tsopt ) ); @@ -581,7 +601,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) { tcphdr->ack = htonl ( tcp->rcv_ack ); tcphdr->hlen = ( ( payload - iobuf->data ) << 2 ); tcphdr->flags = flags; - tcphdr->win = htons ( tcp->rcv_win ); + tcphdr->win = htons ( tcp->rcv_win >> tcp->rcv_win_scale ); tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) ); /* Dump header */ @@ -769,6 +789,9 @@ static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data, case TCP_OPTION_MSS: options->mssopt = data; break; + case TCP_OPTION_WS: + options->wsopt = data; + break; case TCP_OPTION_TS: options->tsopt = data; break; @@ -825,6 +848,10 @@ static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq, tcp->rcv_ack = seq; if ( options->tsopt ) tcp->flags |= TCP_TS_ENABLED; + if ( options->wsopt ) { + tcp->snd_win_scale = options->wsopt->scale; + tcp->rcv_win_scale = TCP_RX_WINDOW_SCALE; + } } /* Ignore duplicate SYN */ @@ -1168,7 +1195,7 @@ static int tcp_rx ( struct io_buffer *iobuf, tcp = tcp_demux ( ntohs ( tcphdr->dest ) ); seq = ntohl ( tcphdr->seq ); ack = ntohl ( tcphdr->ack ); - win = ntohs ( tcphdr->win ); + win = ( ntohs ( tcphdr->win ) << tcp->snd_win_scale ); flags = tcphdr->flags; tcp_rx_opts ( tcp, ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) ), ( hlen - sizeof ( *tcphdr ) ), &options ); |