summaryrefslogtreecommitdiffstats
path: root/net/netfilter/ipvs/ip_vs_core.c
diff options
context:
space:
mode:
authorVenkata Mohan Reddy2010-02-18 12:31:05 +0100
committerPatrick McHardy2010-02-18 12:31:05 +0100
commit2906f66a5682e5670a5eefe991843689b8d8563f (patch)
treedcbc3ef175c609d2fa32a8e7478eb0d17015c0a7 /net/netfilter/ipvs/ip_vs_core.c
parentMerge branch 'ebt_config_compat_v4' of git://git.breakpoint.cc/fw/nf-next-2.6 (diff)
downloadkernel-qcow2-linux-2906f66a5682e5670a5eefe991843689b8d8563f.tar.gz
kernel-qcow2-linux-2906f66a5682e5670a5eefe991843689b8d8563f.tar.xz
kernel-qcow2-linux-2906f66a5682e5670a5eefe991843689b8d8563f.zip
ipvs: SCTP Trasport Loadbalancing Support
Enhance IPVS to load balance SCTP transport protocol packets. This is done based on the SCTP rfc 4960. All possible control chunks have been taken care. The state machine used in this code looks some what lengthy. I tried to make the state machine easy to understand. Signed-off-by: Venkata Mohan Reddy Koppula <mohanreddykv@gmail.com> Signed-off-by: Simon Horman <horms@verge.net.au> Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net/netfilter/ipvs/ip_vs_core.c')
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c62
1 files changed, 53 insertions, 9 deletions
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 847ffca40184..72e96d823ebf 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -31,6 +31,7 @@
#include <linux/kernel.h>
#include <linux/ip.h>
#include <linux/tcp.h>
+#include <linux/sctp.h>
#include <linux/icmp.h>
#include <net/ip.h>
@@ -81,6 +82,8 @@ const char *ip_vs_proto_name(unsigned proto)
return "UDP";
case IPPROTO_TCP:
return "TCP";
+ case IPPROTO_SCTP:
+ return "SCTP";
case IPPROTO_ICMP:
return "ICMP";
#ifdef CONFIG_IP_VS_IPV6
@@ -589,8 +592,9 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
ip_send_check(ciph);
}
- /* the TCP/UDP port */
- if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
+ /* the TCP/UDP/SCTP port */
+ if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol ||
+ IPPROTO_SCTP == ciph->protocol) {
__be16 *ports = (void *)ciph + ciph->ihl*4;
if (inout)
@@ -630,8 +634,9 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
ciph->saddr = cp->daddr.in6;
}
- /* the TCP/UDP port */
- if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) {
+ /* the TCP/UDP/SCTP port */
+ if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr ||
+ IPPROTO_SCTP == ciph->nexthdr) {
__be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
if (inout)
@@ -679,7 +684,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
goto out;
}
- if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol)
+ if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
+ IPPROTO_SCTP == protocol)
offset += 2 * sizeof(__u16);
if (!skb_make_writable(skb, offset))
goto out;
@@ -857,6 +863,21 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
}
#endif
+/*
+ * Check if sctp chunc is ABORT chunk
+ */
+static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len)
+{
+ sctp_chunkhdr_t *sch, schunk;
+ sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t),
+ sizeof(schunk), &schunk);
+ if (sch == NULL)
+ return 0;
+ if (sch->type == SCTP_CID_ABORT)
+ return 1;
+ return 0;
+}
+
static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
{
struct tcphdr _tcph, *th;
@@ -999,7 +1020,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
if (unlikely(!cp)) {
if (sysctl_ip_vs_nat_icmp_send &&
(pp->protocol == IPPROTO_TCP ||
- pp->protocol == IPPROTO_UDP)) {
+ pp->protocol == IPPROTO_UDP ||
+ pp->protocol == IPPROTO_SCTP)) {
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, iph.len,
@@ -1014,8 +1036,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
* existing entry if it is not RST
* packet or not TCP packet.
*/
- if (iph.protocol != IPPROTO_TCP
- || !is_tcp_reset(skb, iph.len)) {
+ if ((iph.protocol != IPPROTO_TCP &&
+ iph.protocol != IPPROTO_SCTP)
+ || ((iph.protocol == IPPROTO_TCP
+ && !is_tcp_reset(skb, iph.len))
+ || (iph.protocol == IPPROTO_SCTP
+ && !is_sctp_abort(skb,
+ iph.len)))) {
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
icmpv6_send(skb,
@@ -1235,7 +1262,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
- if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
+ if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr ||
+ IPPROTO_SCTP == cih->nexthdr)
offset += 2 * sizeof(__u16);
verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
/* do not touch skb anymore */
@@ -1358,6 +1386,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
* encorage the standby servers to update the connections timeout
*/
pkts = atomic_add_return(1, &cp->in_pkts);
+ if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+ cp->protocol == IPPROTO_SCTP) {
+ if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
+ (atomic_read(&cp->in_pkts) %
+ sysctl_ip_vs_sync_threshold[1]
+ == sysctl_ip_vs_sync_threshold[0])) ||
+ (cp->old_state != cp->state &&
+ ((cp->state == IP_VS_SCTP_S_CLOSED) ||
+ (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
+ (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
+ ip_vs_sync_conn(cp);
+ goto out;
+ }
+ }
+
if (af == AF_INET &&
(ip_vs_sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
@@ -1370,6 +1413,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
(cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
(cp->state == IP_VS_TCP_S_TIME_WAIT)))))
ip_vs_sync_conn(cp);
+out:
cp->old_state = cp->state;
ip_vs_conn_put(cp);