From de3b7a06dfe15bda3e66a52285d422b954bb4832 Mon Sep 17 00:00:00 2001
From: Steffen Klassert
Date: Thu, 4 Dec 2014 09:46:20 +0100
Subject: xfrm6: Fix transport header offset in _decode_session6.

skb->transport_header might not be valid when we do a reverse
decode because the ipv6 tunnel error handlers don't update it
to the inner transport header. This leads to a wrong offset
calculation and to wrong layer 4 informations. We fix this
by using the size of the ipv6 header as the first offset.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/xfrm6_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 5f983644373a..aa48302f00a1 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -130,8 +130,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 {
 	struct flowi6 *fl6 = &fl->u.ip6;
 	int onlyproto = 0;
-	u16 offset = skb_network_header_len(skb);
 	const struct ipv6hdr *hdr = ipv6_hdr(skb);
+	u16 offset = sizeof(*hdr);
 	struct ipv6_opt_hdr *exthdr;
 	const unsigned char *nh = skb_network_header(skb);
 	u8 nexthdr = nh[IP6CB(skb)->nhoff];
-- 
cgit v1.2.3-55-g7522


From f855691975bb06373a98711e4cfe2c224244b536 Mon Sep 17 00:00:00 2001
From: Steffen Klassert
Date: Mon, 8 Dec 2014 07:56:18 +0100
Subject: xfrm6: Fix the nexthdr offset in _decode_session6.

xfrm_decode_session() was originally designed for the
usage in the receive path where the correct nexthdr offset
is stored in IP6CB(skb)->nhoff. Over time this function
spread to code that is used in the output path (netfilter,
vti) where IP6CB(skb)->nhoff is not set. As a result, we
get a wrong nexthdr and the upper layer flow informations
are wrong. This can leed to incorrect policy lookups.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/xfrm6_policy.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index aa48302f00a1..48bf5a06847b 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -134,8 +134,14 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	u16 offset = sizeof(*hdr);
 	struct ipv6_opt_hdr *exthdr;
 	const unsigned char *nh = skb_network_header(skb);
-	u8 nexthdr = nh[IP6CB(skb)->nhoff];
+	u16 nhoff = IP6CB(skb)->nhoff;
 	int oif = 0;
+	u8 nexthdr;
+
+	if (!nhoff)
+		nhoff = offsetof(struct ipv6hdr, nexthdr);
+
+	nexthdr = nh[nhoff];
 
 	if (skb_dst(skb))
 		oif = skb_dst(skb)->dev->ifindex;
-- 
cgit v1.2.3-55-g7522


From 2af81d6718f5ec92b1d787e0fe79b0d3b6f78601 Mon Sep 17 00:00:00 2001
From: Luciano Coelho
Date: Wed, 21 Jan 2015 22:19:34 +0200
Subject: mac80211: only roll back station states for WDS when suspending

In normal cases (i.e. when we are fully associated), cfg80211 takes
care of removing all the stations before calling suspend in mac80211.

But in the corner case when we suspend during authentication or
association, mac80211 needs to roll back the station states.  But we
shouldn't roll back the station states in the suspend function,
because this is taken care of in other parts of the code, except for
WDS interfaces.  For AP types of interfaces, cfg80211 takes care of
disconnecting all stations before calling the driver's suspend code.
For station interfaces, this is done in the quiesce code.

For WDS interfaces we still need to do it here, so move the code into
a new switch case for WDS.

Cc: stable@kernel.org [3.15+]
Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/pm.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 4c5192e0d66c..4a95fe3cffbc 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -86,20 +86,6 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 		}
 	}
 
-	/* tear down aggregation sessions and remove STAs */
-	mutex_lock(&local->sta_mtx);
-	list_for_each_entry(sta, &local->sta_list, list) {
-		if (sta->uploaded) {
-			enum ieee80211_sta_state state;
-
-			state = sta->sta_state;
-			for (; state > IEEE80211_STA_NOTEXIST; state--)
-				WARN_ON(drv_sta_state(local, sta->sdata, sta,
-						      state, state - 1));
-		}
-	}
-	mutex_unlock(&local->sta_mtx);
-
 	/* remove all interfaces that were created in the driver */
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		if (!ieee80211_sdata_running(sdata))
@@ -111,6 +97,21 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 		case NL80211_IFTYPE_STATION:
 			ieee80211_mgd_quiesce(sdata);
 			break;
+		case NL80211_IFTYPE_WDS:
+			/* tear down aggregation sessions and remove STAs */
+			mutex_lock(&local->sta_mtx);
+			sta = sdata->u.wds.sta;
+			if (sta && sta->uploaded) {
+				enum ieee80211_sta_state state;
+
+				state = sta->sta_state;
+				for (; state > IEEE80211_STA_NOTEXIST; state--)
+					WARN_ON(drv_sta_state(local, sta->sdata,
+							      sta, state,
+							      state - 1));
+			}
+			mutex_unlock(&local->sta_mtx);
+			break;
 		default:
 			break;
 		}
-- 
cgit v1.2.3-55-g7522


From fb142f4bbb7d718b3d9cc8f27c909b4809545f5c Mon Sep 17 00:00:00 2001
From: Fred Chou
Date: Tue, 20 Jan 2015 10:17:27 +0800
Subject: mac80211: correct header length calculation

HT Control field may also be present in management frames, as defined
in 8.2.4.1.10 of 802.11-2012. Account for this in calculation of header
length.

Signed-off-by: Fred Chou <fred.chou.nd@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/util.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/wireless/util.c b/net/wireless/util.c
index d0ac795445b7..5488c3662f7d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -308,6 +308,12 @@ unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc)
 		goto out;
 	}
 
+	if (ieee80211_is_mgmt(fc)) {
+		if (ieee80211_has_order(fc))
+			hdrlen += IEEE80211_HT_CTL_LEN;
+		goto out;
+	}
+
 	if (ieee80211_is_ctl(fc)) {
 		/*
 		 * ACK and CTS are 10 bytes, all others 16. To see how
-- 
cgit v1.2.3-55-g7522


From 3a5c5e81d8128a9e43abc52b75dd21d3da7a0cfc Mon Sep 17 00:00:00 2001
From: Mathy Vanhoef
Date: Tue, 20 Jan 2015 15:05:08 +0100
Subject: mac80211: properly set CCK flag in radiotap

Fix a regression introduced by commit a5e70697d0c4 ("mac80211: add radiotap flag
and handling for 5/10 MHz") where the IEEE80211_CHAN_CCK channel type flag was
incorrectly replaced by the IEEE80211_CHAN_OFDM flag. This commit fixes that by
using the CCK flag again.

Cc: stable@vger.kernel.org
Fixes: a5e70697d0c4 ("mac80211: add radiotap flag and handling for 5/10 MHz")
Signed-off-by: Mathy Vanhoef <vanhoefm@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 683b10f46505..d69ca513848e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -272,7 +272,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 	else if (rate && rate->flags & IEEE80211_RATE_ERP_G)
 		channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ;
 	else if (rate)
-		channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ;
+		channel_flags |= IEEE80211_CHAN_CCK | IEEE80211_CHAN_2GHZ;
 	else
 		channel_flags |= IEEE80211_CHAN_2GHZ;
 	put_unaligned_le16(channel_flags, pos);
-- 
cgit v1.2.3-55-g7522


From 0fa7b39131576dd1baa6ca17fca53c65d7f62249 Mon Sep 17 00:00:00 2001
From: Johannes Berg
Date: Fri, 23 Jan 2015 11:10:12 +0100
Subject: nl80211: fix per-station group key get/del and memory leak

In case userspace attempts to obtain key information for or delete a
unicast key, this is currently erroneously rejected unless the driver
sets the WIPHY_FLAG_IBSS_RSN flag. Apparently enough drivers do so it
was never noticed.

Fix that, and while at it fix a potential memory leak: the error path
in the get_key() function was placed after allocating a message but
didn't free it - move it to a better place. Luckily admin permissions
are needed to call this operation.

Cc: stable@vger.kernel.org
Fixes: e31b82136d1ad ("cfg80211/mac80211: allow per-station GTKs")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7ca4b5133123..8887c6e5fca8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2854,6 +2854,9 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 	if (!rdev->ops->get_key)
 		return -EOPNOTSUPP;
 
+	if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
+		return -ENOENT;
+
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg)
 		return -ENOMEM;
@@ -2873,10 +2876,6 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr))
 		goto nla_put_failure;
 
-	if (pairwise && mac_addr &&
-	    !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
-		return -ENOENT;
-
 	err = rdev_get_key(rdev, dev, key_idx, pairwise, mac_addr, &cookie,
 			   get_key_callback);
 
@@ -3047,7 +3046,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
 	wdev_lock(dev->ieee80211_ptr);
 	err = nl80211_key_allowed(dev->ieee80211_ptr);
 
-	if (key.type == NL80211_KEYTYPE_PAIRWISE && mac_addr &&
+	if (key.type == NL80211_KEYTYPE_GROUP && mac_addr &&
 	    !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
 		err = -ENOENT;
 
-- 
cgit v1.2.3-55-g7522


From 6b8d9117ccb4f81b1244aafa7bc70ef8fa45fc49 Mon Sep 17 00:00:00 2001
From: Sasha Levin
Date: Fri, 23 Jan 2015 20:47:00 -0500
Subject: net: llc: use correct size for sysctl timeout entries

The timeout entries are sizeof(int) rather than sizeof(long), which
means that when they were getting read we'd also leak kernel memory
to userspace along with the timeout values.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/sysctl_net_llc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index 612a5ddaf93b..799bafc2af39 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -18,28 +18,28 @@ static struct ctl_table llc2_timeout_table[] = {
 	{
 		.procname	= "ack",
 		.data		= &sysctl_llc2_ack_timeout,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(sysctl_llc2_ack_timeout),
 		.mode		= 0644,
 		.proc_handler   = proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "busy",
 		.data		= &sysctl_llc2_busy_timeout,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(sysctl_llc2_busy_timeout),
 		.mode		= 0644,
 		.proc_handler   = proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "p",
 		.data		= &sysctl_llc2_p_timeout,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(sysctl_llc2_p_timeout),
 		.mode		= 0644,
 		.proc_handler   = proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "rej",
 		.data		= &sysctl_llc2_rej_timeout,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(sysctl_llc2_rej_timeout),
 		.mode		= 0644,
 		.proc_handler   = proc_dointvec_jiffies,
 	},
-- 
cgit v1.2.3-55-g7522


From 24df8986f36b9a5d8ae08236498d92267bac454b Mon Sep 17 00:00:00 2001
From: Vivien Didelot
Date: Tue, 20 Jan 2015 19:13:32 -0500
Subject: net: dsa: set slave MII bus PHY mask

When registering a mdio bus, Linux assumes than every port has a PHY and tries
to scan it. If a switch port has no PHY registered, DSA will fail to register
the slave MII bus. To fix this, set the slave MII bus PHY mask to the switch
PHYs mask.

As an example, if we use a Marvell MV88E6352 (which is a 7-port switch with no
registered PHYs for port 5 and port 6), with the following declared names:

	static struct dsa_chip_data switch_cdata = {
		[...]
		.port_names[0] = "sw0",
		.port_names[1] = "sw1",
		.port_names[2] = "sw2",
		.port_names[3] = "sw3",
		.port_names[4] = "sw4",
		.port_names[5] = "cpu",
	};

DSA will fail to create the switch instance. With the PHY mask set for the
slave MII bus, only the PHY for ports 0-4 will be scanned and the instance will
be successfully created.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 515569ffde8a..589aafd01fc5 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -46,6 +46,7 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
 	snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x",
 			ds->index, ds->pd->sw_addr);
 	ds->slave_mii_bus->parent = ds->master_dev;
+	ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
 }
 
 
-- 
cgit v1.2.3-55-g7522


From b0a1ba59921eaaa9cb8f97bb35f2e6870fcdfedc Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau
Date: Tue, 20 Jan 2015 19:16:02 -0800
Subject: ipv6: Fix __ip6_route_redirect

In my last commit (a3c00e4: ipv6: Remove BACKTRACK macro), the changes in
__ip6_route_redirect is incorrect.  The following case is missed:
1. The for loop tries to find a valid gateway rt. If it fails to find
   one, rt will be NULL.
2. When rt is NULL, it is set to the ip6_null_entry.
3. The newly added 'else if', from a3c00e4, will stop the backtrack from
   happening.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 166e33bed222..495965358d22 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1242,12 +1242,16 @@ restart:
 		rt = net->ipv6.ip6_null_entry;
 	else if (rt->dst.error) {
 		rt = net->ipv6.ip6_null_entry;
-	} else if (rt == net->ipv6.ip6_null_entry) {
+		goto out;
+	}
+
+	if (rt == net->ipv6.ip6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto restart;
 	}
 
+out:
 	dst_hold(&rt->dst);
 
 	read_unlock_bh(&table->tb6_lock);
-- 
cgit v1.2.3-55-g7522


From 7913ecf69e24bd7575e0d0325eda3b43c8cfa749 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann
Date: Thu, 22 Jan 2015 10:41:01 +0100
Subject: net: cls_bpf: fix size mismatch on filter preparation

In cls_bpf_modify_existing(), we read out the number of filter blocks,
do some sanity checks, allocate a block on that size, and copy over the
BPF instruction blob from user space, then pass everything through the
classic BPF checker prior to installation of the classifier.

We should reject mismatches here, there are 2 scenarios: the number of
filter blocks could be smaller than the provided instruction blob, so
we do a partial copy of the BPF program, and thus the instructions will
either be rejected from the verifier or a valid BPF program will be run;
in the other case, we'll end up copying more than we're supposed to,
and most likely the trailing garbage will be rejected by the verifier
as well (i.e. we need to fit instruction pattern, ret {A,K} needs to be
last instruction, load/stores must be correct, etc); in case not, we
would leak memory when dumping back instruction patterns. The code should
have only used nla_len() as Dave noted to avoid this from the beginning.
Anyway, lets fix it by rejecting such load attempts.

Fixes: 7d1d65cb84e1 ("net: sched: cls_bpf: add BPF-based classifier")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_bpf.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 84c8219c3e1c..49e5fa8795ae 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -180,6 +180,11 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	}
 
 	bpf_size = bpf_len * sizeof(*bpf_ops);
+	if (bpf_size != nla_len(tb[TCA_BPF_OPS])) {
+		ret = -EINVAL;
+		goto errout;
+	}
+
 	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
 	if (bpf_ops == NULL) {
 		ret = -ENOMEM;
-- 
cgit v1.2.3-55-g7522


From 3f2ab135946dcd4eb6af92a53d6d4bd35e7526ca Mon Sep 17 00:00:00 2001
From: Daniel Borkmann
Date: Thu, 22 Jan 2015 10:41:02 +0100
Subject: net: cls_bpf: fix auto generation of per list handles

When creating a bpf classifier in tc with priority collisions and
invoking automatic unique handle assignment, cls_bpf_grab_new_handle()
will return a wrong handle id which in fact is non-unique. Usually
altering of specific filters is being addressed over major id, but
in case of collisions we result in a filter chain, where handle ids
address individual cls_bpf_progs inside the classifier.

Issue is, in cls_bpf_grab_new_handle() we probe for head->hgen handle
in cls_bpf_get() and in case we found a free handle, we're supposed
to use exactly head->hgen. In case of insufficient numbers of handles,
we bail out later as handle id 0 is not allowed.

Fixes: 7d1d65cb84e1 ("net: sched: cls_bpf: add BPF-based classifier")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_bpf.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 49e5fa8795ae..f59adf8a4cd7 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -220,15 +220,21 @@ static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
 				   struct cls_bpf_head *head)
 {
 	unsigned int i = 0x80000000;
+	u32 handle;
 
 	do {
 		if (++head->hgen == 0x7FFFFFFF)
 			head->hgen = 1;
 	} while (--i > 0 && cls_bpf_get(tp, head->hgen));
-	if (i == 0)
+
+	if (unlikely(i == 0)) {
 		pr_err("Insufficient number of handles\n");
+		handle = 0;
+	} else {
+		handle = head->hgen;
+	}
 
-	return i;
+	return handle;
 }
 
 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
-- 
cgit v1.2.3-55-g7522


From 600ddd6825543962fb807884169e57b580dba208 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann
Date: Thu, 22 Jan 2015 18:26:54 +0100
Subject: net: sctp: fix slab corruption from use after free on INIT collisions

When hitting an INIT collision case during the 4WHS with AUTH enabled, as
already described in detail in commit 1be9a950c646 ("net: sctp: inherit
auth_capable on INIT collisions"), it can happen that we occasionally
still remotely trigger the following panic on server side which seems to
have been uncovered after the fix from commit 1be9a950c646 ...

[  533.876389] BUG: unable to handle kernel paging request at 00000000ffffffff
[  533.913657] IP: [<ffffffff811ac385>] __kmalloc+0x95/0x230
[  533.940559] PGD 5030f2067 PUD 0
[  533.957104] Oops: 0000 [#1] SMP
[  533.974283] Modules linked in: sctp mlx4_en [...]
[  534.939704] Call Trace:
[  534.951833]  [<ffffffff81294e30>] ? crypto_init_shash_ops+0x60/0xf0
[  534.984213]  [<ffffffff81294e30>] crypto_init_shash_ops+0x60/0xf0
[  535.015025]  [<ffffffff8128c8ed>] __crypto_alloc_tfm+0x6d/0x170
[  535.045661]  [<ffffffff8128d12c>] crypto_alloc_base+0x4c/0xb0
[  535.074593]  [<ffffffff8160bd42>] ? _raw_spin_lock_bh+0x12/0x50
[  535.105239]  [<ffffffffa0418c11>] sctp_inet_listen+0x161/0x1e0 [sctp]
[  535.138606]  [<ffffffff814e43bd>] SyS_listen+0x9d/0xb0
[  535.166848]  [<ffffffff816149a9>] system_call_fastpath+0x16/0x1b

... or depending on the the application, for example this one:

[ 1370.026490] BUG: unable to handle kernel paging request at 00000000ffffffff
[ 1370.026506] IP: [<ffffffff811ab455>] kmem_cache_alloc+0x75/0x1d0
[ 1370.054568] PGD 633c94067 PUD 0
[ 1370.070446] Oops: 0000 [#1] SMP
[ 1370.085010] Modules linked in: sctp kvm_amd kvm [...]
[ 1370.963431] Call Trace:
[ 1370.974632]  [<ffffffff8120f7cf>] ? SyS_epoll_ctl+0x53f/0x960
[ 1371.000863]  [<ffffffff8120f7cf>] SyS_epoll_ctl+0x53f/0x960
[ 1371.027154]  [<ffffffff812100d3>] ? anon_inode_getfile+0xd3/0x170
[ 1371.054679]  [<ffffffff811e3d67>] ? __alloc_fd+0xa7/0x130
[ 1371.080183]  [<ffffffff816149a9>] system_call_fastpath+0x16/0x1b

With slab debugging enabled, we can see that the poison has been overwritten:

[  669.826368] BUG kmalloc-128 (Tainted: G        W     ): Poison overwritten
[  669.826385] INFO: 0xffff880228b32e50-0xffff880228b32e50. First byte 0x6a instead of 0x6b
[  669.826414] INFO: Allocated in sctp_auth_create_key+0x23/0x50 [sctp] age=3 cpu=0 pid=18494
[  669.826424]  __slab_alloc+0x4bf/0x566
[  669.826433]  __kmalloc+0x280/0x310
[  669.826453]  sctp_auth_create_key+0x23/0x50 [sctp]
[  669.826471]  sctp_auth_asoc_create_secret+0xcb/0x1e0 [sctp]
[  669.826488]  sctp_auth_asoc_init_active_key+0x68/0xa0 [sctp]
[  669.826505]  sctp_do_sm+0x29d/0x17c0 [sctp] [...]
[  669.826629] INFO: Freed in kzfree+0x31/0x40 age=1 cpu=0 pid=18494
[  669.826635]  __slab_free+0x39/0x2a8
[  669.826643]  kfree+0x1d6/0x230
[  669.826650]  kzfree+0x31/0x40
[  669.826666]  sctp_auth_key_put+0x19/0x20 [sctp]
[  669.826681]  sctp_assoc_update+0x1ee/0x2d0 [sctp]
[  669.826695]  sctp_do_sm+0x674/0x17c0 [sctp]

Since this only triggers in some collision-cases with AUTH, the problem at
heart is that sctp_auth_key_put() on asoc->asoc_shared_key is called twice
when having refcnt 1, once directly in sctp_assoc_update() and yet again
from within sctp_auth_asoc_init_active_key() via sctp_assoc_update() on
the already kzfree'd memory, which is also consistent with the observation
of the poison decrease from 0x6b to 0x6a (note: the overwrite is detected
at a later point in time when poison is checked on new allocation).

Reference counting of auth keys revisited:

Shared keys for AUTH chunks are being stored in endpoints and associations
in endpoint_shared_keys list. On endpoint creation, a null key is being
added; on association creation, all endpoint shared keys are being cached
and thus cloned over to the association. struct sctp_shared_key only holds
a pointer to the actual key bytes, that is, struct sctp_auth_bytes which
keeps track of users internally through refcounting. Naturally, on assoc
or enpoint destruction, sctp_shared_key are being destroyed directly and
the reference on sctp_auth_bytes dropped.

User space can add keys to either list via setsockopt(2) through struct
sctp_authkey and by passing that to sctp_auth_set_key() which replaces or
adds a new auth key. There, sctp_auth_create_key() creates a new sctp_auth_bytes
with refcount 1 and in case of replacement drops the reference on the old
sctp_auth_bytes. A key can be set active from user space through setsockopt()
on the id via sctp_auth_set_active_key(), which iterates through either
endpoint_shared_keys and in case of an assoc, invokes (one of various places)
sctp_auth_asoc_init_active_key().

sctp_auth_asoc_init_active_key() computes the actual secret from local's
and peer's random, hmac and shared key parameters and returns a new key
directly as sctp_auth_bytes, that is asoc->asoc_shared_key, plus drops
the reference if there was a previous one. The secret, which where we
eventually double drop the ref comes from sctp_auth_asoc_set_secret() with
intitial refcount of 1, which also stays unchanged eventually in
sctp_assoc_update(). This key is later being used for crypto layer to
set the key for the hash in crypto_hash_setkey() from sctp_auth_calculate_hmac().

To close the loop: asoc->asoc_shared_key is freshly allocated secret
material and independant of the sctp_shared_key management keeping track
of only shared keys in endpoints and assocs. Hence, also commit 4184b2a79a76
("net: sctp: fix memory leak in auth key management") is independant of
this bug here since it concerns a different layer (though same structures
being used eventually). asoc->asoc_shared_key is reference dropped correctly
on assoc destruction in sctp_association_free() and when active keys are
being replaced in sctp_auth_asoc_init_active_key(), it always has a refcount
of 1. Hence, it's freed prematurely in sctp_assoc_update(). Simple fix is
to remove that sctp_auth_key_put() from there which fixes these panics.

Fixes: 730fc3d05cd4 ("[SCTP]: Implete SCTP-AUTH parameter processing")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index f791edd64d6c..26d06dbcc1c8 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1182,7 +1182,6 @@ void sctp_assoc_update(struct sctp_association *asoc,
 	asoc->peer.peer_hmacs = new->peer.peer_hmacs;
 	new->peer.peer_hmacs = NULL;
 
-	sctp_auth_key_put(asoc->asoc_shared_key);
 	sctp_auth_asoc_init_active_key(asoc, GFP_ATOMIC);
 }
 
-- 
cgit v1.2.3-55-g7522


From df4d92549f23e1c037e83323aff58a21b3de7fe0 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa
Date: Fri, 23 Jan 2015 12:01:26 +0100
Subject: ipv4: try to cache dst_entries which would cause a redirect

Not caching dst_entries which cause redirects could be exploited by hosts
on the same subnet, causing a severe DoS attack. This effect aggravated
since commit f88649721268999 ("ipv4: fix dst race in sk_dst_get()").

Lookups causing redirects will be allocated with DST_NOCACHE set which
will force dst_release to free them via RCU.  Unfortunately waiting for
RCU grace period just takes too long, we can end up with >1M dst_entries
waiting to be released and the system will run OOM. rcuos threads cannot
catch up under high softirq load.

Attaching the flag to emit a redirect later on to the specific skb allows
us to cache those dst_entries thus reducing the pressure on allocation
and deallocation.

This issue was discovered by Marcelo Leitner.

Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Marcelo Leitner <mleitner@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h      | 11 ++++++-----
 net/ipv4/ip_forward.c |  3 ++-
 net/ipv4/route.c      |  9 +++++----
 3 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 0bb620702929..f7cbd703d15d 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -39,11 +39,12 @@ struct inet_skb_parm {
 	struct ip_options	opt;		/* Compiled IP options		*/
 	unsigned char		flags;
 
-#define IPSKB_FORWARDED		1
-#define IPSKB_XFRM_TUNNEL_SIZE	2
-#define IPSKB_XFRM_TRANSFORMED	4
-#define IPSKB_FRAG_COMPLETE	8
-#define IPSKB_REROUTED		16
+#define IPSKB_FORWARDED		BIT(0)
+#define IPSKB_XFRM_TUNNEL_SIZE	BIT(1)
+#define IPSKB_XFRM_TRANSFORMED	BIT(2)
+#define IPSKB_FRAG_COMPLETE	BIT(3)
+#define IPSKB_REROUTED		BIT(4)
+#define IPSKB_DOREDIRECT	BIT(5)
 
 	u16			frag_max_size;
 };
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 3a83ce5efa80..787b3c294ce6 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -129,7 +129,8 @@ int ip_forward(struct sk_buff *skb)
 	 *	We now generate an ICMP HOST REDIRECT giving the route
 	 *	we calculated.
 	 */
-	if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb))
+	if (IPCB(skb)->flags & IPSKB_DOREDIRECT && !opt->srr &&
+	    !skb_sec_path(skb))
 		ip_rt_send_redirect(skb);
 
 	skb->priority = rt_tos2priority(iph->tos);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6a2155b02602..d58dd0ec3e53 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1554,11 +1554,10 @@ static int __mkroute_input(struct sk_buff *skb,
 
 	do_cache = res->fi && !itag;
 	if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
+	    skb->protocol == htons(ETH_P_IP) &&
 	    (IN_DEV_SHARED_MEDIA(out_dev) ||
-	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) {
-		flags |= RTCF_DOREDIRECT;
-		do_cache = false;
-	}
+	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
+		IPCB(skb)->flags |= IPSKB_DOREDIRECT;
 
 	if (skb->protocol != htons(ETH_P_IP)) {
 		/* Not IP (i.e. ARP). Do not create route, if it is
@@ -2303,6 +2302,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
 	r->rtm_flags	= (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
 	if (rt->rt_flags & RTCF_NOTIFY)
 		r->rtm_flags |= RTM_F_NOTIFY;
+	if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
+		r->rtm_flags |= RTCF_DOREDIRECT;
 
 	if (nla_put_be32(skb, RTA_DST, dst))
 		goto nla_put_failure;
-- 
cgit v1.2.3-55-g7522


From 86f3cddbc3037882414c7308973530167906b7e9 Mon Sep 17 00:00:00 2001
From: Herbert Xu
Date: Sat, 24 Jan 2015 08:02:40 +1100
Subject: udp_diag: Fix socket skipping within chain

While working on rhashtable walking I noticed that the UDP diag
dumping code is buggy.  In particular, the socket skipping within
a chain never happens, even though we record the number of sockets
that should be skipped.

As this code was supposedly copied from TCP, this patch does what
TCP does and resets num before we walk a chain.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp_diag.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 7927db0a9279..4a000f1dd757 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -99,11 +99,13 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlin
 	s_slot = cb->args[0];
 	num = s_num = cb->args[1];
 
-	for (slot = s_slot; slot <= table->mask; num = s_num = 0, slot++) {
+	for (slot = s_slot; slot <= table->mask; s_num = 0, slot++) {
 		struct sock *sk;
 		struct hlist_nulls_node *node;
 		struct udp_hslot *hslot = &table->hash[slot];
 
+		num = 0;
+
 		if (hlist_nulls_empty(&hslot->head))
 			continue;
 
-- 
cgit v1.2.3-55-g7522


From fc752f1f43c1c038a2c6ae58cc739ebb5953ccb0 Mon Sep 17 00:00:00 2001
From: subashab@codeaurora.org
Date: Fri, 23 Jan 2015 22:26:02 +0000
Subject: ping: Fix race in free in receive path

An exception is seen in ICMP ping receive path where the skb
destructor sock_rfree() tries to access a freed socket. This happens
because ping_rcv() releases socket reference with sock_put() and this
internally frees up the socket. Later icmp_rcv() will try to free the
skb and as part of this, skb destructor is called and which leads
to a kernel panic as the socket is freed already in ping_rcv().

-->|exception
-007|sk_mem_uncharge
-007|sock_rfree
-008|skb_release_head_state
-009|skb_release_all
-009|__kfree_skb
-010|kfree_skb
-011|icmp_rcv
-012|ip_local_deliver_finish

Fix this incorrect free by cloning this skb and processing this cloned
skb instead.

This patch was suggested by Eric Dumazet

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ping.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index c0d82f78d364..2a3720fb5a5f 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -966,8 +966,11 @@ bool ping_rcv(struct sk_buff *skb)
 
 	sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id));
 	if (sk != NULL) {
+		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+
 		pr_debug("rcv on socket %p\n", sk);
-		ping_queue_rcv_skb(sk, skb_get(skb));
+		if (skb2)
+			ping_queue_rcv_skb(sk, skb2);
 		sock_put(sk);
 		return true;
 	}
-- 
cgit v1.2.3-55-g7522


From 6e9e16e6143b725662e47026a1d0f270721cdd24 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa
Date: Mon, 26 Jan 2015 15:11:17 +0100
Subject: ipv6: replacing a rt6_info needs to purge possible propagated
 rt6_infos too

Lubomir Rintel reported that during replacing a route the interface
reference counter isn't correctly decremented.

To quote bug <https://bugzilla.kernel.org/show_bug.cgi?id=91941>:
| [root@rhel7-5 lkundrak]# sh -x lal
| + ip link add dev0 type dummy
| + ip link set dev0 up
| + ip link add dev1 type dummy
| + ip link set dev1 up
| + ip addr add 2001:db8:8086::2/64 dev dev0
| + ip route add 2001:db8:8086::/48 dev dev0 proto static metric 20
| + ip route add 2001:db8:8088::/48 dev dev1 proto static metric 10
| + ip route replace 2001:db8:8086::/48 dev dev1 proto static metric 20
| + ip link del dev0 type dummy
| Message from syslogd@rhel7-5 at Jan 23 10:54:41 ...
|  kernel:unregister_netdevice: waiting for dev0 to become free. Usage count = 2
|
| Message from syslogd@rhel7-5 at Jan 23 10:54:51 ...
|  kernel:unregister_netdevice: waiting for dev0 to become free. Usage count = 2

During replacement of a rt6_info we must walk all parent nodes and check
if the to be replaced rt6_info got propagated. If so, replace it with
an alive one.

Fixes: 4a287eba2de3957 ("IPv6 routing, NLM_F_* flag support: REPLACE and EXCL flags support, warn about missing CREATE flag")
Reported-by: Lubomir Rintel <lkundrak@v3.sk>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Tested-by: Lubomir Rintel <lkundrak@v3.sk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 45 ++++++++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b2d1838897c9..f1c6d5e98322 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -659,6 +659,29 @@ static int fib6_commit_metrics(struct dst_entry *dst,
 	return 0;
 }
 
+static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
+			  struct net *net)
+{
+	if (atomic_read(&rt->rt6i_ref) != 1) {
+		/* This route is used as dummy address holder in some split
+		 * nodes. It is not leaked, but it still holds other resources,
+		 * which must be released in time. So, scan ascendant nodes
+		 * and replace dummy references to this route with references
+		 * to still alive ones.
+		 */
+		while (fn) {
+			if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) {
+				fn->leaf = fib6_find_prefix(net, fn);
+				atomic_inc(&fn->leaf->rt6i_ref);
+				rt6_release(rt);
+			}
+			fn = fn->parent;
+		}
+		/* No more references are possible at this point. */
+		BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
+	}
+}
+
 /*
  *	Insert routing information in a node.
  */
@@ -807,11 +830,12 @@ add:
 		rt->dst.rt6_next = iter->dst.rt6_next;
 		atomic_inc(&rt->rt6i_ref);
 		inet6_rt_notify(RTM_NEWROUTE, rt, info);
-		rt6_release(iter);
 		if (!(fn->fn_flags & RTN_RTINFO)) {
 			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
 			fn->fn_flags |= RTN_RTINFO;
 		}
+		fib6_purge_rt(iter, fn, info->nl_net);
+		rt6_release(iter);
 	}
 
 	return 0;
@@ -1322,24 +1346,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 		fn = fib6_repair_tree(net, fn);
 	}
 
-	if (atomic_read(&rt->rt6i_ref) != 1) {
-		/* This route is used as dummy address holder in some split
-		 * nodes. It is not leaked, but it still holds other resources,
-		 * which must be released in time. So, scan ascendant nodes
-		 * and replace dummy references to this route with references
-		 * to still alive ones.
-		 */
-		while (fn) {
-			if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) {
-				fn->leaf = fib6_find_prefix(net, fn);
-				atomic_inc(&fn->leaf->rt6i_ref);
-				rt6_release(rt);
-			}
-			fn = fn->parent;
-		}
-		/* No more references are possible at this point. */
-		BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
-	}
+	fib6_purge_rt(rt, fn, net);
 
 	inet6_rt_notify(RTM_DELROUTE, rt, info);
 	rt6_release(rt);
-- 
cgit v1.2.3-55-g7522


From 06539d3071067ff146a9bffd1c801fa56d290909 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Tue, 27 Jan 2015 12:25:33 -0800
Subject: net: don't OOPS on socket aio

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index a2c33a4dc7ba..418795caa897 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -869,9 +869,6 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
 					 struct sock_iocb *siocb)
 {
-	if (!is_sync_kiocb(iocb))
-		BUG();
-
 	siocb->kiocb = iocb;
 	iocb->private = siocb;
 	return siocb;
-- 
cgit v1.2.3-55-g7522