From 3c5ab3f395d66a9e4e937fcfdf6ebc63894f028b Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 29 Apr 2017 20:33:09 +0300 Subject: ipvs: SNAT packet replies only for NATed connections We do not check if packet from real server is for NAT connection before performing SNAT. This causes problems for setups that use DR/TUN and allow local clients to access the real server directly, for example: - local client in director creates IPVS-DR/TUN connection CIP->VIP and the request packets are routed to RIP. Talks are finished but IPVS connection is not expired yet. - second local client creates non-IPVS connection CIP->RIP with same reply tuple RIP->CIP and when replies are received on LOCAL_IN we wrongly assign them for the first client connection because RIP->CIP matches the reply direction. As result, IPVS SNATs replies for non-IPVS connections. The problem is more visible to local UDP clients but in rare cases it can happen also for TCP or remote clients when the real server sends the reply traffic via the director. So, better to be more precise for the reply traffic. As replies are not expected for DR/TUN connections, better to not touch them. Reported-by: Nick Moriarty Tested-by: Nick Moriarty Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index d2d7bdf1d510..ad99c1ceea6f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -849,10 +849,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, { unsigned int verdict = NF_DROP; - if (IP_VS_FWD_METHOD(cp) != 0) { - pr_err("shouldn't reach here, because the box is on the " - "half connection in the tun/dr module.\n"); - } + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto ignore_cp; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { @@ -886,6 +884,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); + +ignore_cp: verdict = NF_ACCEPT; out: @@ -1385,8 +1385,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in */ cp = pp->conn_out_get(ipvs, af, skb, &iph); - if (likely(cp)) + if (likely(cp)) { + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto ignore_cp; return handle_response(af, skb, pd, cp, &iph, hooknum); + } /* Check for real-server-started requests */ if (atomic_read(&ipvs->conn_out_counter)) { @@ -1444,9 +1447,15 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in } } } + +out: IP_VS_DBG_PKT(12, af, pp, skb, iph.off, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; + +ignore_cp: + __ip_vs_conn_put(cp); + goto out; } /* -- cgit v1.2.3-55-g7522 From a2b7cbdd2559aff06cebc28a7150f81c307a90d3 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 19 Apr 2017 11:39:20 -0700 Subject: netfilter: ctnetlink: Make some parameters integer to avoid enum mismatch Not all parameters passed to ctnetlink_parse_tuple() and ctnetlink_exp_dump_tuple() match the enum type in the signatures of these functions. Since this is intended change the argument type of to be an unsigned integer value. Signed-off-by: Matthias Kaehlcke Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index dcf561b5c97a..fa752626029e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1007,9 +1007,8 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = { static int ctnetlink_parse_tuple(const struct nlattr * const cda[], - struct nf_conntrack_tuple *tuple, - enum ctattr_type type, u_int8_t l3num, - struct nf_conntrack_zone *zone) + struct nf_conntrack_tuple *tuple, u32 type, + u_int8_t l3num, struct nf_conntrack_zone *zone) { struct nlattr *tb[CTA_TUPLE_MAX+1]; int err; @@ -2447,7 +2446,7 @@ static struct nfnl_ct_hook ctnetlink_glue_hook = { static int ctnetlink_exp_dump_tuple(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, - enum ctattr_expect type) + u32 type) { struct nlattr *nest_parms; -- cgit v1.2.3-55-g7522 From d110a3942aca78d14929bc648aeb83ee0b245a61 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 6 May 2017 20:28:02 +0800 Subject: netfilter: don't setup nat info for confirmed ct We cannot setup nat info if the ct has been confirmed already, else, different cpu may race to handle the same ct. In extreme situation, we may hit the "BUG_ON(nf_nat_initialized(ct, maniptype))" in the nf_nat_setup_info. Also running the following commands will easily hit NF_CT_ASSERT in nf_conntrack_alter_reply: # nft flush ruleset # ping -c 2 -W 1 1.1.1.111 & # nft add table t # nft add chain t c {type nat hook postrouting priority 0 \;} # nft add rule t c snat to 4.5.6.7 WARNING: CPU: 1 PID: 10065 at net/netfilter/nf_conntrack_core.c:1472 nf_conntrack_alter_reply+0x9a/0x1a0 [nf_conntrack] [...] Call Trace: nf_nat_setup_info+0xad/0x840 [nf_nat] ? deactivate_slab+0x65d/0x6c0 nft_nat_eval+0xcd/0x100 [nft_nat] nft_do_chain+0xff/0x5d0 [nf_tables] ? mark_held_locks+0x6f/0xa0 ? __local_bh_enable_ip+0x70/0xa0 ? trace_hardirqs_on_caller+0x11f/0x190 ? ipt_do_table+0x310/0x610 ? trace_hardirqs_on+0xd/0x10 ? __local_bh_enable_ip+0x70/0xa0 ? ipt_do_table+0x32b/0x610 ? __lock_acquire+0x2ac/0x1580 ? ipt_do_table+0x32b/0x610 nft_nat_do_chain+0x65/0x80 [nft_chain_nat_ipv4] nf_nat_ipv4_fn+0x1ae/0x240 [nf_nat_ipv4] nf_nat_ipv4_out+0x4a/0xf0 [nf_nat_ipv4] nft_nat_ipv4_out+0x15/0x20 [nft_chain_nat_ipv4] nf_hook_slow+0x2c/0xf0 ip_output+0x154/0x270 So for the confirmed ct, just ignore it and return NF_ACCEPT. Fixes: 9a08ecfe74d7 ("netfilter: don't attach a nat extension by default") Signed-off-by: Liping Zhang Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index b48d6b5aae8a..ef0be325a0c6 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -409,6 +409,10 @@ nf_nat_setup_info(struct nf_conn *ct, { struct nf_conntrack_tuple curr_tuple, new_tuple; + /* Can't setup nat info for confirmed ct. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || maniptype == NF_NAT_MANIP_DST); BUG_ON(nf_nat_initialized(ct, maniptype)); -- cgit v1.2.3-55-g7522 From d91fc59cd77c719f33eda65c194ad8f95a055190 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 7 May 2017 22:01:55 +0800 Subject: netfilter: introduce nf_conntrack_helper_put helper function And convert module_put invocation to nf_conntrack_helper_put, this is prepared for the followup patch, which will add a refcnt for cthelper, so we can reject the deleting request when cthelper is in use. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_helper.h | 2 ++ net/netfilter/nf_conntrack_helper.c | 6 ++++++ net/netfilter/nft_ct.c | 4 ++-- net/netfilter/xt_CT.c | 6 +++--- net/openvswitch/conntrack.c | 4 ++-- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index e04fa7691e5d..c1c12411103a 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -79,6 +79,8 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name, struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); +void nf_conntrack_helper_put(struct nf_conntrack_helper *helper); + void nf_ct_helper_init(struct nf_conntrack_helper *helper, u16 l3num, u16 protonum, const char *name, u16 default_port, u16 spec_port, u32 id, diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 3a60efa7799b..e17006b6e434 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -181,6 +181,12 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) } EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); +void nf_conntrack_helper_put(struct nf_conntrack_helper *helper) +{ + module_put(helper->me); +} +EXPORT_SYMBOL_GPL(nf_conntrack_helper_put); + struct nf_conn_help * nf_ct_helper_ext_add(struct nf_conn *ct, struct nf_conntrack_helper *helper, gfp_t gfp) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index a34ceb38fc55..1678e9e75e8e 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -826,9 +826,9 @@ static void nft_ct_helper_obj_destroy(struct nft_object *obj) struct nft_ct_helper_obj *priv = nft_obj_data(obj); if (priv->helper4) - module_put(priv->helper4->me); + nf_conntrack_helper_put(priv->helper4); if (priv->helper6) - module_put(priv->helper6->me); + nf_conntrack_helper_put(priv->helper6); } static void nft_ct_helper_obj_eval(struct nft_object *obj, diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index bb7ad82dcd56..623ef37de886 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -96,7 +96,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name, help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); if (help == NULL) { - module_put(helper->me); + nf_conntrack_helper_put(helper); return -ENOMEM; } @@ -263,7 +263,7 @@ out: err4: help = nfct_help(ct); if (help) - module_put(help->helper->me); + nf_conntrack_helper_put(help->helper); err3: nf_ct_tmpl_free(ct); err2: @@ -346,7 +346,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, if (ct) { help = nfct_help(ct); if (help) - module_put(help->helper->me); + nf_conntrack_helper_put(help->helper); nf_ct_netns_put(par->net, par->family); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index bf602e33c40a..08679ebb3068 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1123,7 +1123,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL); if (!help) { - module_put(helper->me); + nf_conntrack_helper_put(helper); return -ENOMEM; } @@ -1584,7 +1584,7 @@ void ovs_ct_free_action(const struct nlattr *a) static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) { if (ct_info->helper) - module_put(ct_info->helper->me); + nf_conntrack_helper_put(ct_info->helper); if (ct_info->ct) nf_ct_tmpl_free(ct_info->ct); } -- cgit v1.2.3-55-g7522 From 9338d7b4418e9996a7642867d8f6b482a6040ed6 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 7 May 2017 22:01:56 +0800 Subject: netfilter: nfnl_cthelper: reject del request if helper obj is in use We can still delete the ct helper even if it is in use, this will cause a use-after-free error. In more detail, I mean: # nfct helper add ssdp inet udp # iptables -t raw -A OUTPUT -p udp -j CT --helper ssdp # nfct helper delete ssdp //--> oops, succeed! BUG: unable to handle kernel paging request at 000026ca IP: 0x26ca [...] Call Trace: ? ipv4_helper+0x62/0x80 [nf_conntrack_ipv4] nf_hook_slow+0x21/0xb0 ip_output+0xe9/0x100 ? ip_fragment.constprop.54+0xc0/0xc0 ip_local_out+0x33/0x40 ip_send_skb+0x16/0x80 udp_send_skb+0x84/0x240 udp_sendmsg+0x35d/0xa50 So add reference count to fix this issue, if ct helper is used by others, reject the delete request. Apply this patch: # nfct helper delete ssdp nfct v1.4.3: netlink error: Device or resource busy Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_helper.h | 2 ++ net/netfilter/nf_conntrack_helper.c | 6 ++++++ net/netfilter/nfnetlink_cthelper.c | 17 +++++++++++------ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index c1c12411103a..c519bb5b5bb8 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -9,6 +9,7 @@ #ifndef _NF_CONNTRACK_HELPER_H #define _NF_CONNTRACK_HELPER_H +#include #include #include #include @@ -26,6 +27,7 @@ struct nf_conntrack_helper { struct hlist_node hnode; /* Internal use. */ char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */ + refcount_t refcnt; struct module *me; /* pointer to self */ const struct nf_conntrack_expect_policy *expect_policy; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index e17006b6e434..7f6100ca63be 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -174,6 +174,10 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) #endif if (h != NULL && !try_module_get(h->me)) h = NULL; + if (h != NULL && !refcount_inc_not_zero(&h->refcnt)) { + module_put(h->me); + h = NULL; + } rcu_read_unlock(); @@ -183,6 +187,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); void nf_conntrack_helper_put(struct nf_conntrack_helper *helper) { + refcount_dec(&helper->refcnt); module_put(helper->me); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_put); @@ -423,6 +428,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) } } } + refcount_set(&me->refcnt, 1); hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]); nf_ct_helper_count++; out: diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 950bf6eadc65..be678a323598 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -686,6 +686,7 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, tuple_set = true; } + ret = -ENOENT; list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { cur = &nlcth->helper; j++; @@ -699,16 +700,20 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, tuple.dst.protonum != cur->tuple.dst.protonum)) continue; - found = true; - nf_conntrack_helper_unregister(cur); - kfree(cur->expect_policy); + if (refcount_dec_if_one(&cur->refcnt)) { + found = true; + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); - list_del(&nlcth->list); - kfree(nlcth); + list_del(&nlcth->list); + kfree(nlcth); + } else { + ret = -EBUSY; + } } /* Make sure we return success if we flush and there is no helpers */ - return (found || j == 0) ? 0 : -ENOENT; + return (found || j == 0) ? 0 : ret; } static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = { -- cgit v1.2.3-55-g7522 From 324318f0248c31be8a08984146e7e4dd7cdd091d Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 9 May 2017 16:17:37 -0400 Subject: netfilter: xtables: zero padding in data_to_user When looking up an iptables rule, the iptables binary compares the aligned match and target data (XT_ALIGN). In some cases this can exceed the actual data size to include padding bytes. Before commit f77bc5b23fb1 ("iptables: use match, target and data copy_to_user helpers") the malloc()ed bytes were overwritten by the kernel with kzalloced contents, zeroing the padding and making the comparison succeed. After this patch, the kernel copies and clears only data, leaving the padding bytes undefined. Extend the clear operation from data size to aligned data size to include the padding bytes, if any. Padding bytes can be observed in both match and target, and the bug triggered, by issuing a rule with match icmp and target ACCEPT: iptables -t mangle -A INPUT -i lo -p icmp --icmp-type 1 -j ACCEPT iptables -t mangle -D INPUT -i lo -p icmp --icmp-type 1 -j ACCEPT Fixes: f77bc5b23fb1 ("iptables: use match, target and data copy_to_user helpers") Reported-by: Paul Moore Reported-by: Richard Guy Briggs Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 2 +- net/bridge/netfilter/ebtables.c | 9 ++++++--- net/netfilter/x_tables.c | 9 ++++++--- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index be378cf47fcc..b3044c2c62cb 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -294,7 +294,7 @@ int xt_match_to_user(const struct xt_entry_match *m, int xt_target_to_user(const struct xt_entry_target *t, struct xt_entry_target __user *u); int xt_data_to_user(void __user *dst, const void *src, - int usersize, int size); + int usersize, int size, int aligned_size); void *xt_copy_counters_from_user(const void __user *user, unsigned int len, struct xt_counters_info *info, bool compat); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 9ec0c9f908fa..9c6e619f452b 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1373,7 +1373,8 @@ static inline int ebt_obj_to_user(char __user *um, const char *_name, strlcpy(name, _name, sizeof(name)); if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) || put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) || - xt_data_to_user(um + entrysize, data, usersize, datasize)) + xt_data_to_user(um + entrysize, data, usersize, datasize, + XT_ALIGN(datasize))) return -EFAULT; return 0; @@ -1658,7 +1659,8 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr, if (match->compat_to_user(cm->data, m->data)) return -EFAULT; } else { - if (xt_data_to_user(cm->data, m->data, match->usersize, msize)) + if (xt_data_to_user(cm->data, m->data, match->usersize, msize, + COMPAT_XT_ALIGN(msize))) return -EFAULT; } @@ -1687,7 +1689,8 @@ static int compat_target_to_user(struct ebt_entry_target *t, if (target->compat_to_user(cm->data, t->data)) return -EFAULT; } else { - if (xt_data_to_user(cm->data, t->data, target->usersize, tsize)) + if (xt_data_to_user(cm->data, t->data, target->usersize, tsize, + COMPAT_XT_ALIGN(tsize))) return -EFAULT; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8876b7da6884..d17769599c10 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -283,12 +283,13 @@ static int xt_obj_to_user(u16 __user *psize, u16 size, &U->u.user.revision, K->u.kernel.TYPE->revision) int xt_data_to_user(void __user *dst, const void *src, - int usersize, int size) + int usersize, int size, int aligned_size) { usersize = usersize ? : size; if (copy_to_user(dst, src, usersize)) return -EFAULT; - if (usersize != size && clear_user(dst + usersize, size - usersize)) + if (usersize != aligned_size && + clear_user(dst + usersize, aligned_size - usersize)) return -EFAULT; return 0; @@ -298,7 +299,9 @@ EXPORT_SYMBOL_GPL(xt_data_to_user); #define XT_DATA_TO_USER(U, K, TYPE, C_SIZE) \ xt_data_to_user(U->data, K->data, \ K->u.kernel.TYPE->usersize, \ - C_SIZE ? : K->u.kernel.TYPE->TYPE##size) + C_SIZE ? : K->u.kernel.TYPE->TYPE##size, \ + C_SIZE ? COMPAT_XT_ALIGN(C_SIZE) : \ + XT_ALIGN(K->u.kernel.TYPE->TYPE##size)) int xt_match_to_user(const struct xt_entry_match *m, struct xt_entry_match __user *u) -- cgit v1.2.3-55-g7522 From 87e94dbc210a720a34be5c1174faee5c84be963e Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 11 May 2017 18:56:38 +0200 Subject: netfilter: synproxy: fix conntrackd interaction This patch fixes the creation of connection tracking entry from netlink when synproxy is used. It was missing the addition of the synproxy extension. This was causing kernel crashes when a conntrack entry created by conntrackd was used after the switch of traffic from active node to the passive node. Signed-off-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index fa752626029e..9799a50bc604 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -45,6 +45,8 @@ #include #include #include +#include +#include #ifdef CONFIG_NF_NAT_NEEDED #include #include @@ -1827,6 +1829,8 @@ ctnetlink_create_conntrack(struct net *net, nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); nf_ct_labels_ext_add(ct); + nfct_seqadj_ext_add(ct); + nfct_synproxy_ext_add(ct); /* we must add conntrack extensions before confirmation. */ ct->status |= IPS_CONFIRMED; -- cgit v1.2.3-55-g7522 From fa803605eef39372e53d7813002d73a3fcf10c88 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 14 May 2017 21:35:22 +0800 Subject: netfilter: nf_tables: can't assume lock is acquired when dumping set elems When dumping the elements related to a specified set, we may invoke the nf_tables_dump_set with the NFNL_SUBSYS_NFTABLES lock not acquired. So we should use the proper rcu operation to avoid race condition, just like other nft dump operations. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 78 +++++++++++++++++++++++++++++++------------ net/netfilter/nft_set_hash.c | 2 +- 2 files changed, 57 insertions(+), 23 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 559225029740..5f4a4d48b871 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3367,35 +3367,50 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, return nf_tables_fill_setelem(args->skb, set, elem); } +struct nft_set_dump_ctx { + const struct nft_set *set; + struct nft_ctx ctx; +}; + static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) { + struct nft_set_dump_ctx *dump_ctx = cb->data; struct net *net = sock_net(skb->sk); - u8 genmask = nft_genmask_cur(net); + struct nft_af_info *afi; + struct nft_table *table; struct nft_set *set; struct nft_set_dump_args args; - struct nft_ctx ctx; - struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1]; + bool set_found = false; struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; struct nlattr *nest; u32 portid, seq; - int event, err; + int event; - err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla, - NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy, - NULL); - if (err < 0) - return err; + rcu_read_lock(); + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { + if (afi != dump_ctx->ctx.afi) + continue; - err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh, - (void *)nla, genmask); - if (err < 0) - return err; + list_for_each_entry_rcu(table, &afi->tables, list) { + if (table != dump_ctx->ctx.table) + continue; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], - genmask); - if (IS_ERR(set)) - return PTR_ERR(set); + list_for_each_entry_rcu(set, &table->sets, list) { + if (set == dump_ctx->set) { + set_found = true; + break; + } + } + break; + } + break; + } + + if (!set_found) { + rcu_read_unlock(); + return -ENOENT; + } event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM); portid = NETLINK_CB(cb->skb).portid; @@ -3407,11 +3422,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) goto nla_put_failure; nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = ctx.afi->family; + nfmsg->nfgen_family = afi->family; nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(ctx.net->nft.base_seq & 0xffff); + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name)) + if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name)) goto nla_put_failure; @@ -3422,12 +3437,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) args.cb = cb; args.skb = skb; - args.iter.genmask = nft_genmask_cur(ctx.net); + args.iter.genmask = nft_genmask_cur(net); args.iter.skip = cb->args[0]; args.iter.count = 0; args.iter.err = 0; args.iter.fn = nf_tables_dump_setelem; - set->ops->walk(&ctx, set, &args.iter); + set->ops->walk(&dump_ctx->ctx, set, &args.iter); + rcu_read_unlock(); nla_nest_end(skb, nest); nlmsg_end(skb, nlh); @@ -3441,9 +3457,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; nla_put_failure: + rcu_read_unlock(); return -ENOSPC; } +static int nf_tables_dump_set_done(struct netlink_callback *cb) +{ + kfree(cb->data); + return 0; +} + static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -3465,7 +3488,18 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nf_tables_dump_set, + .done = nf_tables_dump_set_done, }; + struct nft_set_dump_ctx *dump_ctx; + + dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL); + if (!dump_ctx) + return -ENOMEM; + + dump_ctx->set = set; + dump_ctx->ctx = ctx; + + c.data = dump_ctx; return netlink_dump_start(nlsk, skb, nlh, &c); } return -EOPNOTSUPP; diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 8ec086b6b56b..3d3a6df4ce70 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -222,7 +222,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_elem elem; int err; - err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); + err = rhashtable_walk_init(&priv->ht, &hti, GFP_ATOMIC); iter->err = err; if (err) return; -- cgit v1.2.3-55-g7522 From 71df14b0ce094be46d105b5a3ededd83b8e779a0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 15 May 2017 11:17:29 +0100 Subject: netfilter: nf_tables: missing sanitization in data from userspace Do not assume userspace always sends us NFT_DATA_VALUE for bitwise and cmp expressions. Although NFT_DATA_VERDICT does not make any sense, it is still possible to handcraft a netlink message using this incorrect data type. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_bitwise.c | 19 ++++++++++++++----- net/netfilter/nft_cmp.c | 12 ++++++++++-- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 877d9acd91ef..96bd4f325b0f 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -83,17 +83,26 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, tb[NFTA_BITWISE_MASK]); if (err < 0) return err; - if (d1.len != priv->len) - return -EINVAL; + if (d1.len != priv->len) { + err = -EINVAL; + goto err1; + } err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2, tb[NFTA_BITWISE_XOR]); if (err < 0) - return err; - if (d2.len != priv->len) - return -EINVAL; + goto err1; + if (d2.len != priv->len) { + err = -EINVAL; + goto err2; + } return 0; +err2: + nft_data_uninit(&priv->xor, d2.type); +err1: + nft_data_uninit(&priv->mask, d1.type); + return err; } static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 2b96effeadc1..8c9d0fb19118 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -201,10 +201,18 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) if (err < 0) return ERR_PTR(err); + if (desc.type != NFT_DATA_VALUE) { + err = -EINVAL; + goto err1; + } + if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ) return &nft_cmp_fast_ops; - else - return &nft_cmp_ops; + + return &nft_cmp_ops; +err1: + nft_data_uninit(&data, desc.type); + return ERR_PTR(-EINVAL); } struct nft_expr_type nft_cmp_type __read_mostly = { -- cgit v1.2.3-55-g7522 From 591054469b3eef34bc097c30fae8ededddf8d796 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 15 May 2017 11:17:34 +0100 Subject: netfilter: nf_tables: revisit chain/object refcounting from elements Andreas reports that the following incremental update using our commit protocol doesn't work. # nft -f incremental-update.nft delete element ip filter client_to_any { 10.180.86.22 : goto CIn_1 } delete chain ip filter CIn_1 ... Error: Could not process rule: Device or resource busy The existing code is not well-integrated into the commit phase protocol, since element deletions do not result in refcount decrement from the preparation phase. This results in bogus EBUSY errors like the one above. Two new functions come with this patch: * nft_set_elem_activate() function is used from the abort path, to restore the set element refcounting on objects that occurred from the preparation phase. * nft_set_elem_deactivate() that is called from nft_del_setelem() to decrement set element refcounting on objects from the preparation phase in the commit protocol. The nft_data_uninit() has been renamed to nft_data_release() since this function does not uninitialize any data store in the data register, instead just releases the references to objects. Moreover, a new function nft_data_hold() has been introduced to be used from nft_set_elem_activate(). Reported-by: Andreas Schultz Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 82 ++++++++++++++++++++++++++++++++++----- net/netfilter/nft_bitwise.c | 4 +- net/netfilter/nft_cmp.c | 2 +- net/netfilter/nft_immediate.c | 5 ++- net/netfilter/nft_range.c | 4 +- 6 files changed, 81 insertions(+), 18 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 028faec8fc27..8a8bab8d7b15 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -176,7 +176,7 @@ struct nft_data_desc { int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, unsigned int size, struct nft_data_desc *desc, const struct nlattr *nla); -void nft_data_uninit(const struct nft_data *data, enum nft_data_types type); +void nft_data_release(const struct nft_data *data, enum nft_data_types type); int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, enum nft_data_types type, unsigned int len); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5f4a4d48b871..da314be0c048 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3627,9 +3627,9 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); - nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); + nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) - nft_data_uninit(nft_set_ext_data(ext), set->dtype); + nft_data_release(nft_set_ext_data(ext), set->dtype); if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) @@ -3638,6 +3638,18 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); +/* Only called from commit path, nft_set_elem_deactivate() already deals with + * the refcounting from the preparation phase. + */ +static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem) +{ + struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) + nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); + kfree(elem); +} + static int nft_setelem_parse_flags(const struct nft_set *set, const struct nlattr *attr, u32 *flags) { @@ -3849,9 +3861,9 @@ err4: kfree(elem.priv); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) - nft_data_uninit(&data, d2.type); + nft_data_release(&data, d2.type); err2: - nft_data_uninit(&elem.key.val, d1.type); + nft_data_release(&elem.key.val, d1.type); err1: return err; } @@ -3896,6 +3908,53 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, return err; } +/** + * nft_data_hold - hold a nft_data item + * + * @data: struct nft_data to release + * @type: type of data + * + * Hold a nft_data item. NFT_DATA_VALUE types can be silently discarded, + * NFT_DATA_VERDICT bumps the reference to chains in case of NFT_JUMP and + * NFT_GOTO verdicts. This function must be called on active data objects + * from the second phase of the commit protocol. + */ +static void nft_data_hold(const struct nft_data *data, enum nft_data_types type) +{ + if (type == NFT_DATA_VERDICT) { + switch (data->verdict.code) { + case NFT_JUMP: + case NFT_GOTO: + data->verdict.chain->use++; + break; + } + } +} + +static void nft_set_elem_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) +{ + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_hold(nft_set_ext_data(ext), set->dtype); + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) + (*nft_set_ext_obj(ext))->use++; +} + +static void nft_set_elem_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) +{ + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_release(nft_set_ext_data(ext), set->dtype); + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) + (*nft_set_ext_obj(ext))->use--; +} + static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { @@ -3961,6 +4020,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, kfree(elem.priv); elem.priv = priv; + nft_set_elem_deactivate(ctx->net, set, &elem); + nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; @@ -3970,7 +4031,7 @@ err4: err3: kfree(elem.priv); err2: - nft_data_uninit(&elem.key.val, desc.type); + nft_data_release(&elem.key.val, desc.type); err1: return err; } @@ -4777,8 +4838,8 @@ static void nf_tables_commit_release(struct nft_trans *trans) nft_set_destroy(nft_trans_set(trans)); break; case NFT_MSG_DELSETELEM: - nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem(trans).priv, true); + nf_tables_set_elem_destroy(nft_trans_elem_set(trans), + nft_trans_elem(trans).priv); break; case NFT_MSG_DELOBJ: nft_obj_destroy(nft_trans_obj(trans)); @@ -5013,6 +5074,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; + nft_set_elem_activate(net, te->set, &te->elem); te->set->ops->activate(net, te->set, &te->elem); te->set->ndeact--; @@ -5498,7 +5560,7 @@ int nft_data_init(const struct nft_ctx *ctx, EXPORT_SYMBOL_GPL(nft_data_init); /** - * nft_data_uninit - release a nft_data item + * nft_data_release - release a nft_data item * * @data: struct nft_data to release * @type: type of data @@ -5506,7 +5568,7 @@ EXPORT_SYMBOL_GPL(nft_data_init); * Release a nft_data item. NFT_DATA_VALUE types can be silently discarded, * all others need to be released by calling this function. */ -void nft_data_uninit(const struct nft_data *data, enum nft_data_types type) +void nft_data_release(const struct nft_data *data, enum nft_data_types type) { if (type < NFT_DATA_VERDICT) return; @@ -5517,7 +5579,7 @@ void nft_data_uninit(const struct nft_data *data, enum nft_data_types type) WARN_ON(1); } } -EXPORT_SYMBOL_GPL(nft_data_uninit); +EXPORT_SYMBOL_GPL(nft_data_release); int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, enum nft_data_types type, unsigned int len) diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 96bd4f325b0f..fff8073e2a56 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -99,9 +99,9 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, return 0; err2: - nft_data_uninit(&priv->xor, d2.type); + nft_data_release(&priv->xor, d2.type); err1: - nft_data_uninit(&priv->mask, d1.type); + nft_data_release(&priv->mask, d1.type); return err; } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 8c9d0fb19118..c2945eb3397c 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -211,7 +211,7 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) return &nft_cmp_ops; err1: - nft_data_uninit(&data, desc.type); + nft_data_release(&data, desc.type); return ERR_PTR(-EINVAL); } diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 728baf88295a..4717d7796927 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -65,7 +65,7 @@ static int nft_immediate_init(const struct nft_ctx *ctx, return 0; err1: - nft_data_uninit(&priv->data, desc.type); + nft_data_release(&priv->data, desc.type); return err; } @@ -73,7 +73,8 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); - return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg)); + + return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg)); } static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 9edc74eedc10..cedb96c3619f 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -102,9 +102,9 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr priv->len = desc_from.len; return 0; err2: - nft_data_uninit(&priv->data_to, desc_to.type); + nft_data_release(&priv->data_to, desc_to.type); err1: - nft_data_uninit(&priv->data_from, desc_from.type); + nft_data_release(&priv->data_from, desc_from.type); return err; } -- cgit v1.2.3-55-g7522 From c953d63548207a085abcb12a15fefc8a11ffdf0a Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 16 May 2017 09:30:18 +0800 Subject: ebtables: arpreply: Add the standard target sanity check The info->target comes from userspace and it would be used directly. So we need to add the sanity check to make sure it is a valid standard target, although the ebtables tool has already checked it. Kernel needs to validate anything coming from userspace. If the target is set as an evil value, it would break the ebtables and cause a panic. Because the non-standard target is treated as one offset. Now add one helper function ebt_invalid_target, and we would replace the macro INVALID_TARGET later. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 5 +++++ net/bridge/netfilter/ebt_arpreply.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index a30efb437e6d..e0cbf17af780 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -125,4 +125,9 @@ extern unsigned int ebt_do_table(struct sk_buff *skb, /* True if the target is not a standard target */ #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0) +static inline bool ebt_invalid_target(int target) +{ + return (target < -NUM_STANDARD_TARGETS || target >= 0); +} + #endif diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index 5929309beaa1..db85230e49c3 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -68,6 +68,9 @@ static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par) if (e->ethproto != htons(ETH_P_ARP) || e->invflags & EBT_IPROTO) return -EINVAL; + if (ebt_invalid_target(info->target)) + return -EINVAL; + return 0; } -- cgit v1.2.3-55-g7522 From 751a9c763849f5859cb69ea44b0430d00672f637 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 17 May 2017 11:24:47 -0400 Subject: netfilter: xtables: fix build failure from COMPAT_XT_ALIGN outside CONFIG_COMPAT The patch in the Fixes references COMPAT_XT_ALIGN in the definition of XT_DATA_TO_USER, outside an #ifdef CONFIG_COMPAT block. Split XT_DATA_TO_USER into separate compat and non compat variants and define the first inside an CONFIG_COMPAT block. This simplifies both variants by removing branches inside the macro. Fixes: 324318f0248c ("netfilter: xtables: zero padding in data_to_user") Reported-by: Stephen Rothwell Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d17769599c10..1770c1d9b37f 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -296,18 +296,17 @@ int xt_data_to_user(void __user *dst, const void *src, } EXPORT_SYMBOL_GPL(xt_data_to_user); -#define XT_DATA_TO_USER(U, K, TYPE, C_SIZE) \ +#define XT_DATA_TO_USER(U, K, TYPE) \ xt_data_to_user(U->data, K->data, \ K->u.kernel.TYPE->usersize, \ - C_SIZE ? : K->u.kernel.TYPE->TYPE##size, \ - C_SIZE ? COMPAT_XT_ALIGN(C_SIZE) : \ - XT_ALIGN(K->u.kernel.TYPE->TYPE##size)) + K->u.kernel.TYPE->TYPE##size, \ + XT_ALIGN(K->u.kernel.TYPE->TYPE##size)) int xt_match_to_user(const struct xt_entry_match *m, struct xt_entry_match __user *u) { return XT_OBJ_TO_USER(u, m, match, 0) || - XT_DATA_TO_USER(u, m, match, 0); + XT_DATA_TO_USER(u, m, match); } EXPORT_SYMBOL_GPL(xt_match_to_user); @@ -315,7 +314,7 @@ int xt_target_to_user(const struct xt_entry_target *t, struct xt_entry_target __user *u) { return XT_OBJ_TO_USER(u, t, target, 0) || - XT_DATA_TO_USER(u, t, target, 0); + XT_DATA_TO_USER(u, t, target); } EXPORT_SYMBOL_GPL(xt_target_to_user); @@ -614,6 +613,12 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, } EXPORT_SYMBOL_GPL(xt_compat_match_from_user); +#define COMPAT_XT_DATA_TO_USER(U, K, TYPE, C_SIZE) \ + xt_data_to_user(U->data, K->data, \ + K->u.kernel.TYPE->usersize, \ + C_SIZE, \ + COMPAT_XT_ALIGN(C_SIZE)) + int xt_compat_match_to_user(const struct xt_entry_match *m, void __user **dstptr, unsigned int *size) { @@ -629,7 +634,7 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, if (match->compat_to_user((void __user *)cm->data, m->data)) return -EFAULT; } else { - if (XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm))) + if (COMPAT_XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm))) return -EFAULT; } @@ -975,7 +980,7 @@ int xt_compat_target_to_user(const struct xt_entry_target *t, if (target->compat_to_user((void __user *)ct->data, t->data)) return -EFAULT; } else { - if (XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct))) + if (COMPAT_XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct))) return -EFAULT; } -- cgit v1.2.3-55-g7522 From 499350a5a6e7512d9ed369ed63a4244b6536f4f8 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 18 May 2017 11:22:33 -0700 Subject: tcp: initialize rcv_mss to TCP_MIN_MSS instead of 0 When tcp_disconnect() is called, inet_csk_delack_init() sets icsk->icsk_ack.rcv_mss to 0. This could potentially cause tcp_recvmsg() => tcp_cleanup_rbuf() => __tcp_select_window() call path to have division by 0 issue. So this patch initializes rcv_mss to TCP_MIN_MSS instead of 0. Reported-by: Andrey Konovalov Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1e4c76d2b827..842b575f8fdd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2320,6 +2320,10 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); + /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 + * issue in __tcp_select_window() + */ + icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); -- cgit v1.2.3-55-g7522 From 34eb5fe07831458cf8238d54c1fc847dedeaf68c Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 18 May 2017 12:41:18 -0700 Subject: arp: fixed error in a comment the is_garp code deals just with gratuitous ARP packets, not every unsolicited packet. This patch is a result of a discussion in netdev: http://marc.info/?l=linux-netdev&m=149506354216994 Suggested-by: Julian Anastasov Signed-off-by: Ihar Hrachyshka Signed-off-by: David S. Miller --- net/ipv4/arp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index d54345a06f72..053492af8a6e 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -846,7 +846,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) */ is_garp = tip == sip && addr_type == RTN_UNICAST; - /* Unsolicited ARP _replies_ also require target hwaddr to be + /* Gratuitous ARP _replies_ also require target hwaddr to be * the same as source. */ if (is_garp && arp->ar_op == htons(ARPOP_REPLY)) -- cgit v1.2.3-55-g7522 From 6fd05633bdafc0ae6ec0d55e61af10780d4d3530 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 18 May 2017 12:41:19 -0700 Subject: arp: decompose is_garp logic into a separate function The code is quite involving already to earn a separate function for itself. If anything, it helps arp_process readability. Signed-off-by: Ihar Hrachyshka Signed-off-by: David S. Miller --- net/ipv4/arp.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 053492af8a6e..ca6e1e6c1496 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -641,6 +641,27 @@ void arp_xmit(struct sk_buff *skb) } EXPORT_SYMBOL(arp_xmit); +static bool arp_is_garp(struct net_device *dev, int addr_type, + __be16 ar_op, + __be32 sip, __be32 tip, + unsigned char *sha, unsigned char *tha) +{ + bool is_garp = tip == sip && addr_type == RTN_UNICAST; + + /* Gratuitous ARP _replies_ also require target hwaddr to be + * the same as source. + */ + if (is_garp && ar_op == htons(ARPOP_REPLY)) + is_garp = + /* IPv4 over IEEE 1394 doesn't provide target + * hardware address field in its ARP payload. + */ + tha && + !memcmp(tha, sha, dev->addr_len); + + return is_garp; +} + /* * Process an arp request. */ @@ -844,18 +865,8 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) It is possible, that this option should be enabled for some devices (strip is candidate) */ - is_garp = tip == sip && addr_type == RTN_UNICAST; - - /* Gratuitous ARP _replies_ also require target hwaddr to be - * the same as source. - */ - if (is_garp && arp->ar_op == htons(ARPOP_REPLY)) - is_garp = - /* IPv4 over IEEE 1394 doesn't provide target - * hardware address field in its ARP payload. - */ - tha && - !memcmp(tha, sha, dev->addr_len); + is_garp = arp_is_garp(dev, addr_type, arp->ar_op, + sip, tip, sha, tha); if (!n && ((arp->ar_op == htons(ARPOP_REPLY) && -- cgit v1.2.3-55-g7522 From d9ef2e7bf99f59179b89d5c1c4d5b4919375daee Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 18 May 2017 12:41:20 -0700 Subject: arp: postpone addr_type calculation to as late as possible The addr_type retrieval can be costly, so it's worth trying to avoid its calculation as much as possible. This patch makes it calculated only for gratuitous ARP packets. This is especially important since later we may want to move is_garp calculation outside of arp_accept block, at which point the costly operation will be executed for all setups. The patch is the result of a discussion in net-dev: http://marc.info/?l=linux-netdev&m=149506354216994 Suggested-by: Julian Anastasov Signed-off-by: Ihar Hrachyshka Signed-off-by: David S. Miller --- net/ipv4/arp.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index ca6e1e6c1496..c22103cec823 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -641,12 +641,12 @@ void arp_xmit(struct sk_buff *skb) } EXPORT_SYMBOL(arp_xmit); -static bool arp_is_garp(struct net_device *dev, int addr_type, - __be16 ar_op, +static bool arp_is_garp(struct net *net, struct net_device *dev, + int *addr_type, __be16 ar_op, __be32 sip, __be32 tip, unsigned char *sha, unsigned char *tha) { - bool is_garp = tip == sip && addr_type == RTN_UNICAST; + bool is_garp = tip == sip; /* Gratuitous ARP _replies_ also require target hwaddr to be * the same as source. @@ -659,6 +659,11 @@ static bool arp_is_garp(struct net_device *dev, int addr_type, tha && !memcmp(tha, sha, dev->addr_len); + if (is_garp) { + *addr_type = inet_addr_type_dev_table(net, dev, sip); + if (*addr_type != RTN_UNICAST) + is_garp = false; + } return is_garp; } @@ -859,18 +864,23 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) n = __neigh_lookup(&arp_tbl, &sip, dev, 0); if (IN_DEV_ARP_ACCEPT(in_dev)) { - unsigned int addr_type = inet_addr_type_dev_table(net, dev, sip); + addr_type = -1; /* Unsolicited ARP is not accepted by default. It is possible, that this option should be enabled for some devices (strip is candidate) */ - is_garp = arp_is_garp(dev, addr_type, arp->ar_op, + is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op, sip, tip, sha, tha); if (!n && - ((arp->ar_op == htons(ARPOP_REPLY) && - addr_type == RTN_UNICAST) || is_garp)) + (is_garp || + (arp->ar_op == htons(ARPOP_REPLY) && + (addr_type == RTN_UNICAST || + (addr_type < 0 && + /* postpone calculation to as late as possible */ + inet_addr_type_dev_table(net, dev, sip) == + RTN_UNICAST))))) n = __neigh_lookup(&arp_tbl, &sip, dev, 1); } -- cgit v1.2.3-55-g7522 From 7d472a59c0e5ec117220a05de6b370447fb6cb66 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 18 May 2017 12:41:21 -0700 Subject: arp: always override existing neigh entries with gratuitous ARP Currently, when arp_accept is 1, we always override existing neigh entries with incoming gratuitous ARP replies. Otherwise, we override them only if new replies satisfy _locktime_ conditional (packets arrive not earlier than _locktime_ seconds since the last update to the neigh entry). The idea behind locktime is to pick the very first (=> close) reply received in a unicast burst when ARP proxies are used. This helps to avoid ARP thrashing where Linux would switch back and forth from one proxy to another. This logic has nothing to do with gratuitous ARP replies that are generally not aligned in time when multiple IP address carriers send them into network. This patch enforces overriding of existing neigh entries by all incoming gratuitous ARP packets, irrespective of their time of arrival. This will make the kernel honour all incoming gratuitous ARP packets. Signed-off-by: Ihar Hrachyshka Signed-off-by: David S. Miller --- net/ipv4/arp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c22103cec823..ae96e6f3e0cb 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -863,16 +863,17 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) n = __neigh_lookup(&arp_tbl, &sip, dev, 0); - if (IN_DEV_ARP_ACCEPT(in_dev)) { + if (n || IN_DEV_ARP_ACCEPT(in_dev)) { addr_type = -1; + is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op, + sip, tip, sha, tha); + } + if (IN_DEV_ARP_ACCEPT(in_dev)) { /* Unsolicited ARP is not accepted by default. It is possible, that this option should be enabled for some devices (strip is candidate) */ - is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op, - sip, tip, sha, tha); - if (!n && (is_garp || (arp->ar_op == htons(ARPOP_REPLY) && -- cgit v1.2.3-55-g7522 From fe0cd8ca1b82983db24b173bb8518ea646c02d25 Mon Sep 17 00:00:00 2001 From: Nisar Sayed Date: Fri, 19 May 2017 14:00:25 +0000 Subject: smsc95xx: Support only IPv4 TCP/UDP csum offload When TX checksum offload is used, if the computed checksum is 0 the LAN95xx device do not alter the checksum to 0xffff. In the case of ipv4 UDP checksum, it indicates to receiver that no checksum is calculated. Under ipv6, UDP checksum yields a result of zero must be changed to 0xffff. Hence disabling checksum offload for ipv6 packets. Signed-off-by: Nisar Sayed Reported-by: popcorn mix Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 765400b62168..2dfca96a63b6 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -681,7 +681,7 @@ static int smsc95xx_set_features(struct net_device *netdev, if (ret < 0) return ret; - if (features & NETIF_F_HW_CSUM) + if (features & NETIF_F_IP_CSUM) read_buf |= Tx_COE_EN_; else read_buf &= ~Tx_COE_EN_; @@ -1279,12 +1279,19 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) spin_lock_init(&pdata->mac_cr_lock); + /* LAN95xx devices do not alter the computed checksum of 0 to 0xffff. + * RFC 2460, ipv6 UDP calculated checksum yields a result of zero must + * be changed to 0xffff. RFC 768, ipv4 UDP computed checksum is zero, + * it is transmitted as all ones. The zero transmitted checksum means + * transmitter generated no checksum. Hence, enable csum offload only + * for ipv4 packets. + */ if (DEFAULT_TX_CSUM_ENABLE) - dev->net->features |= NETIF_F_HW_CSUM; + dev->net->features |= NETIF_F_IP_CSUM; if (DEFAULT_RX_CSUM_ENABLE) dev->net->features |= NETIF_F_RXCSUM; - dev->net->hw_features = NETIF_F_HW_CSUM | NETIF_F_RXCSUM; + dev->net->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM; smsc95xx_init_mac_address(dev); -- cgit v1.2.3-55-g7522 From 6d18c732b95c0a9d35e9f978b4438bba15412284 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 19 May 2017 22:20:29 +0800 Subject: bridge: start hello_timer when enabling KERNEL_STP in br_stp_start Since commit 76b91c32dd86 ("bridge: stp: when using userspace stp stop kernel hello and hold timers"), bridge would not start hello_timer if stp_enabled is not KERNEL_STP when br_dev_open. The problem is even if users set stp_enabled with KERNEL_STP later, the timer will still not be started. It causes that KERNEL_STP can not really work. Users have to re-ifup the bridge to avoid this. This patch is to fix it by starting br->hello_timer when enabling KERNEL_STP in br_stp_start. As an improvement, it's also to start hello_timer again only when br->stp_enabled is KERNEL_STP in br_hello_timer_expired, there is no reason to start the timer again when it's NO_STP. Fixes: 76b91c32dd86 ("bridge: stp: when using userspace stp stop kernel hello and hold timers") Reported-by: Haidong Li Signed-off-by: Xin Long Acked-by: Nikolay Aleksandrov Reviewed-by: Ivan Vecera Signed-off-by: David S. Miller --- net/bridge/br_stp_if.c | 1 + net/bridge/br_stp_timer.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 08341d2aa9c9..0db8102995a5 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -179,6 +179,7 @@ static void br_stp_start(struct net_bridge *br) br_debug(br, "using kernel STP\n"); /* To start timers on any ports left in blocking */ + mod_timer(&br->hello_timer, jiffies + br->hello_time); br_port_state_selection(br); } diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index c98b3e5c140a..60b6fe277a8b 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -40,7 +40,7 @@ static void br_hello_timer_expired(unsigned long arg) if (br->dev->flags & IFF_UP) { br_config_bpdu_generation(br); - if (br->stp_enabled != BR_USER_STP) + if (br->stp_enabled == BR_KERNEL_STP) mod_timer(&br->hello_timer, round_jiffies(jiffies + br->hello_time)); } -- cgit v1.2.3-55-g7522 From 232cd35d0804cc241eb887bb8d4d9b3b9881c64a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 May 2017 14:17:48 -0700 Subject: ipv6: fix out of bound writes in __ip6_append_data() Andrey Konovalov and idaifish@gmail.com reported crashes caused by one skb shared_info being overwritten from __ip6_append_data() Andrey program lead to following state : copy -4200 datalen 2000 fraglen 2040 maxfraglen 2040 alloclen 2048 transhdrlen 0 offset 0 fraggap 6200 The skb_copy_and_csum_bits(skb_prev, maxfraglen, data + transhdrlen, fraggap, 0); is overwriting skb->head and skb_shared_info Since we apparently detect this rare condition too late, move the code earlier to even avoid allocating skb and risking crashes. Once again, many thanks to Andrey and syzkaller team. Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Reported-by: Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d4a31becbd25..bf8a58a1c32d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1466,6 +1466,11 @@ alloc_new_skb: */ alloclen += sizeof(struct frag_hdr); + copy = datalen - transhdrlen - fraggap; + if (copy < 0) { + err = -EINVAL; + goto error; + } if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen + hh_len, @@ -1515,13 +1520,9 @@ alloc_new_skb: data += fraggap; pskb_trim_unique(skb_prev, maxfraglen); } - copy = datalen - transhdrlen - fraggap; - - if (copy < 0) { - err = -EINVAL; - kfree_skb(skb); - goto error; - } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { + if (copy > 0 && + getfrag(from, data + transhdrlen, offset, + copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; -- cgit v1.2.3-55-g7522 From bd703a1524e851b8d6d646be9dafd794d4eb6045 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 20 May 2017 01:52:11 +0300 Subject: net: atheros: atl2: don't return zero on failure path in atl2_probe() If dma mask checks fail in atl2_probe(), it breaks off initialization, deallocates all resources, but returns zero. The patch adds proper error code return value and make error code setup unified. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/atlx/atl2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 63f2deec2a52..77a1c03255de 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -1353,6 +1353,7 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) && pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) { printk(KERN_ERR "atl2: No usable DMA configuration, aborting\n"); + err = -EIO; goto err_dma; } @@ -1366,10 +1367,11 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * pcibios_set_master to do the needed arch specific settings */ pci_set_master(pdev); - err = -ENOMEM; netdev = alloc_etherdev(sizeof(struct atl2_adapter)); - if (!netdev) + if (!netdev) { + err = -ENOMEM; goto err_alloc_etherdev; + } SET_NETDEV_DEV(netdev, &pdev->dev); @@ -1408,8 +1410,6 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_sw_init; - err = -EIO; - netdev->hw_features = NETIF_F_HW_VLAN_CTAG_RX; netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); -- cgit v1.2.3-55-g7522 From 751da2a69b7cc82d83dc310ed7606225f2d6e014 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 19 May 2017 19:43:45 -0400 Subject: bonding: fix accounting of active ports in 3ad As of 7bb11dc9f59d and 0622cab0341c, bond slaves in a 3ad bond are not removed from the aggregator when they are down, and the active slave count is NOT equal to number of ports in the aggregator, but rather the number of ports in the aggregator that are still enabled. The sysfs spew for bonding_show_ad_num_ports() has a comment that says "Show number of active 802.3ad ports.", but it's currently showing total number of ports, both active and inactive. Remedy it by using the same logic introduced in 0622cab0341c in __bond_3ad_get_active_agg_info(), so sysfs, procfs and netlink all report the number of active ports. Note that this means that IFLA_BOND_AD_INFO_NUM_PORTS really means NUM_ACTIVE_PORTS instead of NUM_PORTS, and thus perhaps should be renamed for clarity. Lightly tested on a dual i40e lacp bond, simulating link downs with an ip link set dev down, was able to produce the state where I could see both in the same aggregator, but a number of ports count of 1. MII Status: up Active Aggregator Info: Aggregator ID: 1 Number of ports: 2 <--- Slave Interface: ens10 MII Status: up <--- Aggregator ID: 1 Slave Interface: ens11 MII Status: up Aggregator ID: 1 MII Status: up Active Aggregator Info: Aggregator ID: 1 Number of ports: 1 <--- Slave Interface: ens10 MII Status: down <--- Aggregator ID: 1 Slave Interface: ens11 MII Status: up Aggregator ID: 1 CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- drivers/net/bonding/bond_3ad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index c5fd4259da33..b44a6aeb346d 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2577,7 +2577,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond, return -1; ad_info->aggregator_id = aggregator->aggregator_identifier; - ad_info->ports = aggregator->num_of_ports; + ad_info->ports = __agg_active_ports(aggregator); ad_info->actor_key = aggregator->actor_oper_aggregator_key; ad_info->partner_key = aggregator->partner_oper_aggregator_key; ether_addr_copy(ad_info->partner_system, -- cgit v1.2.3-55-g7522 From e4eda884db7930cee434828759064b4711604078 Mon Sep 17 00:00:00 2001 From: David S. Miller Date: Mon, 22 May 2017 12:27:07 -0400 Subject: net: Make IP alignment calulations clearer. The assignmnet: ip_align = strict ? 2 : NET_IP_ALIGN; in compare_pkt_ptr_alignment() trips up Coverity because we can only get to this code when strict is true, therefore ip_align will always be 2 regardless of NET_IP_ALIGN's value. So just assign directly to '2' and explain the situation in the comment above. Reported-by: "Gustavo A. R. Silva" Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1eddb713b815..c72cd41f5b8b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -808,11 +808,15 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, reg_off += reg->aux_off; } - /* skb->data is NET_IP_ALIGN-ed, but for strict alignment checking - * we force this to 2 which is universally what architectures use - * when they don't set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS. + /* For platforms that do not have a Kconfig enabling + * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of + * NET_IP_ALIGN is universally set to '2'. And on platforms + * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get + * to this code only in strict mode where we want to emulate + * the NET_IP_ALIGN==2 checking. Therefore use an + * unconditional IP align value of '2'. */ - ip_align = strict ? 2 : NET_IP_ALIGN; + ip_align = 2; if ((ip_align + reg_off + off) % size != 0) { verbose("misaligned packet access off %d+%d+%d size %d\n", ip_align, reg_off, off, size); -- cgit v1.2.3-55-g7522 From 72ccc471e13b8266d2ee2104521df5b92ba08e9c Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 19 May 2017 14:46:46 -0400 Subject: bonding: fix randomly populated arp target array In commit dc9c4d0fe023, the arp_target array moved from a static global to a local variable. By the nature of static globals, the array used to be initialized to all 0. At present, it's full of random data, which that gets interpreted as arp_target values, when none have actually been specified. Systems end up booting with spew along these lines: [ 32.161783] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready [ 32.168475] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready [ 32.175089] 8021q: adding VLAN 0 to HW filter on device lacp0 [ 32.193091] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready [ 32.204892] lacp0: Setting MII monitoring interval to 100 [ 32.211071] lacp0: Removing ARP target 216.124.228.17 [ 32.216824] lacp0: Removing ARP target 218.160.255.255 [ 32.222646] lacp0: Removing ARP target 185.170.136.184 [ 32.228496] lacp0: invalid ARP target 255.255.255.255 specified for removal [ 32.236294] lacp0: option arp_ip_target: invalid value (-255.255.255.255) [ 32.243987] lacp0: Removing ARP target 56.125.228.17 [ 32.249625] lacp0: Removing ARP target 218.160.255.255 [ 32.255432] lacp0: Removing ARP target 15.157.233.184 [ 32.261165] lacp0: invalid ARP target 255.255.255.255 specified for removal [ 32.268939] lacp0: option arp_ip_target: invalid value (-255.255.255.255) [ 32.276632] lacp0: Removing ARP target 16.0.0.0 [ 32.281755] lacp0: Removing ARP target 218.160.255.255 [ 32.287567] lacp0: Removing ARP target 72.125.228.17 [ 32.293165] lacp0: Removing ARP target 218.160.255.255 [ 32.298970] lacp0: Removing ARP target 8.125.228.17 [ 32.304458] lacp0: Removing ARP target 218.160.255.255 None of these were actually specified as ARP targets, and the driver does seem to clean up the mess okay, but it's rather noisy and confusing, leaks values to userspace, and the 255.255.255.255 spew shows up even when debug prints are disabled. The fix: just zero out arp_target at init time. While we're in here, init arp_all_targets_value in the right place. Fixes: dc9c4d0fe023 ("bonding: reduce scope of some global variables") CC: Mahesh Bandewar CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: netdev@vger.kernel.org CC: stable@vger.kernel.org Signed-off-by: Jarod Wilson Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2be78807fd6e..73313318399c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4271,10 +4271,10 @@ static int bond_check_params(struct bond_params *params) int arp_validate_value, fail_over_mac_value, primary_reselect_value, i; struct bond_opt_value newval; const struct bond_opt_value *valptr; - int arp_all_targets_value; + int arp_all_targets_value = 0; u16 ad_actor_sys_prio = 0; u16 ad_user_port_key = 0; - __be32 arp_target[BOND_MAX_ARP_TARGETS]; + __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0 }; int arp_ip_count; int bond_mode = BOND_MODE_ROUNDROBIN; int xmit_hashtype = BOND_XMIT_POLICY_LAYER2; @@ -4501,7 +4501,6 @@ static int bond_check_params(struct bond_params *params) arp_validate_value = 0; } - arp_all_targets_value = 0; if (arp_all_targets) { bond_opt_initstr(&newval, arp_all_targets); valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS), -- cgit v1.2.3-55-g7522 From 499fde662f1957e3cb8d192a94a099ebe19c714b Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 19 May 2017 11:21:59 -0700 Subject: vsock: use new wait API for vsock_stream_sendmsg() As reported by Michal, vsock_stream_sendmsg() could still sleep at vsock_stream_has_space() after prepare_to_wait(): vsock_stream_has_space vmci_transport_stream_has_space vmci_qpair_produce_free_space qp_lock qp_acquire_queue_mutex mutex_lock Just switch to the new wait API like we did for commit d9dc8b0f8b4e ("net: fix sleeping for sk_wait_event()"). Reported-by: Michal Kubecek Cc: Stefan Hajnoczi Cc: Jorgen Hansen Cc: "Michael S. Tsirkin" Cc: Claudio Imbrenda Signed-off-by: Cong Wang Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 6f7f6757ceef..dfc8c51e4d74 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1540,8 +1540,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, long timeout; int err; struct vsock_transport_send_notify_data send_data; - - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); sk = sock->sk; vsk = vsock_sk(sk); @@ -1584,11 +1583,10 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, if (err < 0) goto out; - while (total_written < len) { ssize_t written; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); while (vsock_stream_has_space(vsk) == 0 && sk->sk_err == 0 && !(sk->sk_shutdown & SEND_SHUTDOWN) && @@ -1597,33 +1595,30 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, /* Don't wait for non-blocking sockets. */ if (timeout == 0) { err = -EAGAIN; - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } err = transport->notify_send_pre_block(vsk, &send_data); if (err < 0) { - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } release_sock(sk); - timeout = schedule_timeout(timeout); + timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout); lock_sock(sk); if (signal_pending(current)) { err = sock_intr_errno(timeout); - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } else if (timeout == 0) { err = -EAGAIN; - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } - - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); /* These checks occur both as part of and after the loop * conditional since we need to check before and after -- cgit v1.2.3-55-g7522 From 2d76b2f8b54abd16225cd80afca36ed43f113c41 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 22 May 2017 16:46:13 +0200 Subject: net: sched: cls_matchall: fix null pointer dereference Since the head is guaranteed by the check above to be null, the call_rcu would explode. Remove the previously logically dead code that was made logically very much alive and kicking. Fixes: 985538eee06f ("net/sched: remove redundant null check on head") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_matchall.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index dee469fed967..51859b8edd7e 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -203,7 +203,6 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, *arg = (unsigned long) head; rcu_assign_pointer(tp->root, new); - call_rcu(&head->rcu, mall_destroy_rcu); return 0; err_replace_hw_filter: -- cgit v1.2.3-55-g7522