diff options
Diffstat (limited to 'net')
202 files changed, 2187 insertions, 1073 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 15293c2a5dd8..8d77b6ee4477 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -443,27 +443,29 @@ static int vlan_dev_fcoe_disable(struct net_device *dev) return rc; } -static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type) +static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid, + struct scatterlist *sgl, unsigned int sgc) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; - int rc = -EINVAL; + int rc = 0; + + if (ops->ndo_fcoe_ddp_target) + rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc); - if (ops->ndo_fcoe_get_wwn) - rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type); return rc; } +#endif -static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid, - struct scatterlist *sgl, unsigned int sgc) +#ifdef NETDEV_FCOE_WWNN +static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; - int rc = 0; - - if (ops->ndo_fcoe_ddp_target) - rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc); + int rc = -EINVAL; + if (ops->ndo_fcoe_get_wwn) + rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type); return rc; } #endif @@ -794,9 +796,11 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, .ndo_fcoe_enable = vlan_dev_fcoe_enable, .ndo_fcoe_disable = vlan_dev_fcoe_disable, - .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, .ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target, #endif +#ifdef NETDEV_FCOE_WWNN + .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, +#endif #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = vlan_dev_poll_controller, .ndo_netpoll_setup = vlan_dev_netpoll_setup, diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 49a16cee2aae..420a98bf79b5 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -879,15 +879,24 @@ static struct notifier_block aarp_notifier = { static unsigned char aarp_snap_id[] = { 0x00, 0x00, 0x00, 0x80, 0xF3 }; -void __init aarp_proto_init(void) +int __init aarp_proto_init(void) { + int rc; + aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv); - if (!aarp_dl) + if (!aarp_dl) { printk(KERN_CRIT "Unable to register AARP with SNAP.\n"); + return -ENOMEM; + } timer_setup(&aarp_timer, aarp_expire_timeout, 0); aarp_timer.expires = jiffies + sysctl_aarp_expiry_time; add_timer(&aarp_timer); - register_netdevice_notifier(&aarp_notifier); + rc = register_netdevice_notifier(&aarp_notifier); + if (rc) { + del_timer_sync(&aarp_timer); + unregister_snap_client(aarp_dl); + } + return rc; } /* Remove the AARP entries associated with a device. */ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 795fbc6c06aa..dbe8b1993be9 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1904,9 +1904,6 @@ static unsigned char ddp_snap_id[] = { 0x08, 0x00, 0x07, 0x80, 0x9B }; EXPORT_SYMBOL(atrtr_get_dev); EXPORT_SYMBOL(atalk_find_dev_addr); -static const char atalk_err_snap[] __initconst = - KERN_CRIT "Unable to register DDP with SNAP.\n"; - /* Called by proto.c on kernel start up */ static int __init atalk_init(void) { @@ -1921,17 +1918,23 @@ static int __init atalk_init(void) goto out_proto; ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv); - if (!ddp_dl) - printk(atalk_err_snap); + if (!ddp_dl) { + pr_crit("Unable to register DDP with SNAP.\n"); + rc = -ENOMEM; + goto out_sock; + } dev_add_pack(<alk_packet_type); dev_add_pack(&ppptalk_packet_type); rc = register_netdevice_notifier(&ddp_notifier); if (rc) - goto out_sock; + goto out_snap; + + rc = aarp_proto_init(); + if (rc) + goto out_dev; - aarp_proto_init(); rc = atalk_proc_init(); if (rc) goto out_aarp; @@ -1945,11 +1948,13 @@ out_proc: atalk_proc_exit(); out_aarp: aarp_cleanup_module(); +out_dev: unregister_netdevice_notifier(&ddp_notifier); -out_sock: +out_snap: dev_remove_pack(&ppptalk_packet_type); dev_remove_pack(<alk_packet_type); unregister_snap_client(ddp_dl); +out_sock: sock_unregister(PF_APPLETALK); out_proto: proto_unregister(&ddp_proto); diff --git a/net/atm/lec.c b/net/atm/lec.c index d7f5cf5b7594..ad4f829193f0 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -710,7 +710,10 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) static int lec_mcast_attach(struct atm_vcc *vcc, int arg) { - if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) + if (arg < 0 || arg >= MAX_LEC_ITF) + return -EINVAL; + arg = array_index_nospec(arg, MAX_LEC_ITF); + if (!dev_lec[arg]) return -EINVAL; vcc->proto_data = dev_lec[arg]; return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc); @@ -728,6 +731,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) i = arg; if (arg >= MAX_LEC_ITF) return -EINVAL; + i = array_index_nospec(arg, MAX_LEC_ITF); if (!dev_lec[i]) { int size; diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index a9b7919c9de5..d5df0114f08a 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -104,8 +104,10 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo); - /* free the TID stats immediately */ - cfg80211_sinfo_release_content(&sinfo); + if (!ret) { + /* free the TID stats immediately */ + cfg80211_sinfo_release_content(&sinfo); + } dev_put(real_netdev); if (ret == -ENOENT) { diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index ef39aabdb694..4fb01108e5f5 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -803,6 +803,8 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, const u8 *mac, const unsigned short vid) { struct batadv_bla_claim search_claim, *claim; + struct batadv_bla_claim *claim_removed_entry; + struct hlist_node *claim_removed_node; ether_addr_copy(search_claim.addr, mac); search_claim.vid = vid; @@ -813,10 +815,18 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): %pM, vid %d\n", __func__, mac, batadv_print_vid(vid)); - batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, - batadv_choose_claim, claim); - batadv_claim_put(claim); /* reference from the hash is gone */ + claim_removed_node = batadv_hash_remove(bat_priv->bla.claim_hash, + batadv_compare_claim, + batadv_choose_claim, claim); + if (!claim_removed_node) + goto free_claim; + /* reference from the hash is gone */ + claim_removed_entry = hlist_entry(claim_removed_node, + struct batadv_bla_claim, hash_entry); + batadv_claim_put(claim_removed_entry); + +free_claim: /* don't need the reference from hash_find() anymore */ batadv_claim_put(claim); } diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 0b4b3fb778a6..208655cf6717 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1116,9 +1116,9 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, struct attribute *attr, char *buff, size_t count) { - struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); struct net_device *net_dev = batadv_kobj_to_netdev(kobj); struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv; u32 tp_override; u32 old_tp_override; bool ret; @@ -1147,7 +1147,10 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, atomic_set(&hard_iface->bat_v.throughput_override, tp_override); - batadv_netlink_notify_hardif(bat_priv, hard_iface); + if (hard_iface->soft_iface) { + bat_priv = netdev_priv(hard_iface->soft_iface); + batadv_netlink_notify_hardif(bat_priv, hard_iface); + } out: batadv_hardif_put(hard_iface); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index f73d79139ae7..26c4e2493ddf 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -616,14 +616,26 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global, const char *message) { + struct batadv_tt_global_entry *tt_removed_entry; + struct hlist_node *tt_removed_node; + batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, batadv_print_vid(tt_global->common.vid), message); - batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, - batadv_choose_tt, &tt_global->common); - batadv_tt_global_entry_put(tt_global); + tt_removed_node = batadv_hash_remove(bat_priv->tt.global_hash, + batadv_compare_tt, + batadv_choose_tt, + &tt_global->common); + if (!tt_removed_node) + return; + + /* drop reference of remove hash entry */ + tt_removed_entry = hlist_entry(tt_removed_node, + struct batadv_tt_global_entry, + common.hash_entry); + batadv_tt_global_entry_put(tt_removed_entry); } /** @@ -1337,9 +1349,10 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid, const char *message, bool roaming) { + struct batadv_tt_local_entry *tt_removed_entry; struct batadv_tt_local_entry *tt_local_entry; u16 flags, curr_flags = BATADV_NO_FLAGS; - void *tt_entry_exists; + struct hlist_node *tt_removed_node; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) @@ -1368,15 +1381,18 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, */ batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL); - tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash, + tt_removed_node = batadv_hash_remove(bat_priv->tt.local_hash, batadv_compare_tt, batadv_choose_tt, &tt_local_entry->common); - if (!tt_entry_exists) + if (!tt_removed_node) goto out; - /* extra call to free the local tt entry */ - batadv_tt_local_entry_put(tt_local_entry); + /* drop reference of remove hash entry */ + tt_removed_entry = hlist_entry(tt_removed_node, + struct batadv_tt_local_entry, + common.hash_entry); + batadv_tt_local_entry_put(tt_removed_entry); out: if (tt_local_entry) diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c index 78bec8df8525..aaa39409eeb7 100644 --- a/net/bluetooth/amp.c +++ b/net/bluetooth/amp.c @@ -161,7 +161,6 @@ static int hmac_sha256(u8 *key, u8 ksize, char *plaintext, u8 psize, u8 *output) } shash->tfm = tfm; - shash->flags = CRYPTO_TFM_REQ_MAY_SLEEP; ret = crypto_shash_digest(shash, plaintext, psize, output); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 9a580999ca57..d892b7c3cc42 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -523,12 +523,12 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, struct sock *sk = sock->sk; int err = 0; - BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr); - if (!addr || addr_len < sizeof(struct sockaddr_sco) || addr->sa_family != AF_BLUETOOTH) return -EINVAL; + BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr); + lock_sock(sk); if (sk->sk_state != BT_OPEN) { diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 621146d04c03..e68c715f8d37 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -183,7 +183,6 @@ static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m, } desc->tfm = tfm; - desc->flags = 0; /* Swap key and message from LSB to MSB */ swap_buf(k, tmp, 16); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 5ea7e56119c1..ba303ee99b9b 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -197,13 +197,10 @@ static void __br_handle_local_finish(struct sk_buff *skb) /* note: already called with rcu_read_lock */ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_bridge_port *p = br_port_get_rcu(skb->dev); - __br_handle_local_finish(skb); - BR_INPUT_SKB_CB(skb)->brdev = p->br->dev; - br_pass_frame_up(skb); - return 0; + /* return 1 to signal the okfn() was called so it's ok to use the skb */ + return 1; } /* @@ -280,10 +277,18 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) goto forward; } - /* Deliver packet to local host only */ - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev), - NULL, skb, skb->dev, NULL, br_handle_local_finish); - return RX_HANDLER_CONSUMED; + /* The else clause should be hit when nf_hook(): + * - returns < 0 (drop/error) + * - returns = 0 (stolen/nf_queue) + * Thus return 1 from the okfn() to signal the skb is ok to pass + */ + if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, + br_handle_local_finish) == 1) { + return RX_HANDLER_PASS; + } else { + return RX_HANDLER_CONSUMED; + } } forward: diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a0e369179f6d..45e7f4173bba 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -601,6 +601,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, if (ipv4_is_local_multicast(group)) return 0; + memset(&br_group, 0, sizeof(br_group)); br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); br_group.vid = vid; @@ -1497,6 +1498,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, own_query = port ? &port->ip4_own_query : &br->ip4_own_query; + memset(&br_group, 0, sizeof(br_group)); br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); br_group.vid = vid; @@ -1520,6 +1522,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, own_query = port ? &port->ip6_own_query : &br->ip6_own_query; + memset(&br_group, 0, sizeof(br_group)); br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); br_group.vid = vid; @@ -2028,7 +2031,8 @@ static void br_multicast_start_querier(struct net_bridge *br, __br_multicast_open(br, query); - list_for_each_entry(port, &br->port_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(port, &br->port_list, list) { if (port->state == BR_STATE_DISABLED || port->state == BR_STATE_BLOCKING) continue; @@ -2040,6 +2044,7 @@ static void br_multicast_start_querier(struct net_bridge *br, br_multicast_enable(&port->ip6_own_query); #endif } + rcu_read_unlock(); } int br_multicast_toggle(struct net_bridge *br, unsigned long val) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 9d34de68571b..22afa566cbce 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -502,6 +502,7 @@ static unsigned int br_nf_pre_routing(void *priv, nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; skb->protocol = htons(ETH_P_IP); + skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4; NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, skb->dev, NULL, diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 564710f88f93..e88d6641647b 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -235,6 +235,8 @@ unsigned int br_nf_pre_routing_ipv6(void *priv, nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; skb->protocol = htons(ETH_P_IPV6); + skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->net, state->sk, skb, skb->dev, NULL, br_nf_pre_routing_finish_ipv6); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 9c07591b0232..7104cf13da84 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1441,7 +1441,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED, br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) || nla_put_u8(skb, IFLA_BR_VLAN_STATS_PER_PORT, - br_opt_get(br, IFLA_BR_VLAN_STATS_PER_PORT))) + br_opt_get(br, BROPT_VLAN_STATS_PER_PORT))) return -EMSGSIZE; #endif #ifdef CONFIG_BRIDGE_IGMP_SNOOPING diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index eb15891f8b9f..3cad01ac64e4 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2032,7 +2032,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, if (match_kern) match_kern->match_size = ret; - if (WARN_ON(type == EBT_COMPAT_TARGET && size_left)) + /* rule should have no remaining data after target */ + if (type == EBT_COMPAT_TARGET && size_left) return -EINVAL; match32 = (struct compat_ebt_entry_mwt *) buf; diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 9cab80207ced..79eac465ec65 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -738,7 +738,6 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started) } EXPORT_SYMBOL(__ceph_open_session); - int ceph_open_session(struct ceph_client *client) { int ret; @@ -754,6 +753,23 @@ int ceph_open_session(struct ceph_client *client) } EXPORT_SYMBOL(ceph_open_session); +int ceph_wait_for_latest_osdmap(struct ceph_client *client, + unsigned long timeout) +{ + u64 newest_epoch; + int ret; + + ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch); + if (ret) + return ret; + + if (client->osdc.osdmap->epoch >= newest_epoch) + return 0; + + ceph_osdc_maybe_request_map(&client->osdc); + return ceph_monc_wait_osdmap(&client->monc, newest_epoch, timeout); +} +EXPORT_SYMBOL(ceph_wait_for_latest_osdmap); static int __init init_ceph_lib(void) { diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 7e71b0df1fbc..3083988ce729 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -840,6 +840,7 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, size_t bytes) { struct ceph_bio_iter *it = &cursor->bio_iter; + struct page *page = bio_iter_page(it->bio, it->iter); BUG_ON(bytes > cursor->resid); BUG_ON(bytes > bio_iter_len(it->bio, it->iter)); @@ -851,7 +852,8 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, return false; /* no more data */ } - if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done)) + if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done && + page == bio_iter_page(it->bio, it->iter))) return false; /* more bytes to process in this segment */ if (!it->iter.bi_size) { @@ -899,6 +901,7 @@ static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor, size_t bytes) { struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs; + struct page *page = bvec_iter_page(bvecs, cursor->bvec_iter); BUG_ON(bytes > cursor->resid); BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter)); @@ -910,7 +913,8 @@ static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor, return false; /* no more data */ } - if (!bytes || cursor->bvec_iter.bi_bvec_done) + if (!bytes || (cursor->bvec_iter.bi_bvec_done && + page == bvec_iter_page(bvecs, cursor->bvec_iter))) return false; /* more bytes to process in this segment */ BUG_ON(cursor->last_piece); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 18deb3d889c4..a53e4fbb6319 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -922,6 +922,15 @@ int ceph_monc_blacklist_add(struct ceph_mon_client *monc, mutex_unlock(&monc->mutex); ret = wait_generic_request(req); + if (!ret) + /* + * Make sure we have the osdmap that includes the blacklist + * entry. This is needed to ensure that the OSDs pick up the + * new blacklist before processing any future requests from + * this client. + */ + ret = ceph_wait_for_latest_osdmap(monc->client, 0); + out: put_generic_request(req); return ret; diff --git a/net/core/datagram.c b/net/core/datagram.c index b2651bb6d2a3..e657289db4ac 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -279,7 +279,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, break; sk_busy_loop(sk, flags & MSG_DONTWAIT); - } while (!skb_queue_empty(&sk->sk_receive_queue)); + } while (sk->sk_receive_queue.prev != *last); error = -EAGAIN; diff --git a/net/core/dev.c b/net/core/dev.c index 2b67f2aa59dd..f409406254dd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1184,7 +1184,21 @@ int dev_change_name(struct net_device *dev, const char *newname) BUG_ON(!dev_net(dev)); net = dev_net(dev); - if (dev->flags & IFF_UP) + + /* Some auto-enslaved devices e.g. failover slaves are + * special, as userspace might rename the device after + * the interface had been brought up and running since + * the point kernel initiated auto-enslavement. Allow + * live name change even when these slave devices are + * up and running. + * + * Typically, users of these auto-enslaving devices + * don't actually care about slave name change, as + * they are supposed to operate on master interface + * directly. + */ + if (dev->flags & IFF_UP && + likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK))) return -EBUSY; write_seqcount_begin(&devnet_rename_seq); @@ -5014,8 +5028,10 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head, if (pt_prev->list_func != NULL) pt_prev->list_func(head, pt_prev, orig_dev); else - list_for_each_entry_safe(skb, next, head, list) + list_for_each_entry_safe(skb, next, head, list) { + skb_list_del_init(skb); pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + } } static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc) diff --git a/net/core/devlink.c b/net/core/devlink.c index 78e22cea4cc7..da0a29f30885 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3897,6 +3897,11 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, continue; } + if (!devlink->ops->info_get) { + idx++; + continue; + } + mutex_lock(&devlink->lock); err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, NETLINK_CB(cb->skb).portid, diff --git a/net/core/ethtool.c b/net/core/ethtool.c index b1eb32419732..36ed619faf36 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1797,11 +1797,16 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) WARN_ON_ONCE(!ret); gstrings.len = ret; - data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN)); - if (gstrings.len && !data) - return -ENOMEM; - __ethtool_get_strings(dev, gstrings.string_set, data); + if (gstrings.len) { + data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN)); + if (!data) + return -ENOMEM; + + __ethtool_get_strings(dev, gstrings.string_set, data); + } else { + data = NULL; + } ret = -EFAULT; if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) @@ -1897,11 +1902,15 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) return -EFAULT; stats.n_stats = n_stats; - data = vzalloc(array_size(n_stats, sizeof(u64))); - if (n_stats && !data) - return -ENOMEM; - ops->get_ethtool_stats(dev, &stats, data); + if (n_stats) { + data = vzalloc(array_size(n_stats, sizeof(u64))); + if (!data) + return -ENOMEM; + ops->get_ethtool_stats(dev, &stats, data); + } else { + data = NULL; + } ret = -EFAULT; if (copy_to_user(useraddr, &stats, sizeof(stats))) @@ -1941,16 +1950,21 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) return -EFAULT; stats.n_stats = n_stats; - data = vzalloc(array_size(n_stats, sizeof(u64))); - if (n_stats && !data) - return -ENOMEM; - if (dev->phydev && !ops->get_ethtool_phy_stats) { - ret = phy_ethtool_get_stats(dev->phydev, &stats, data); - if (ret < 0) - return ret; + if (n_stats) { + data = vzalloc(array_size(n_stats, sizeof(u64))); + if (!data) + return -ENOMEM; + + if (dev->phydev && !ops->get_ethtool_phy_stats) { + ret = phy_ethtool_get_stats(dev->phydev, &stats, data); + if (ret < 0) + goto out; + } else { + ops->get_ethtool_phy_stats(dev, &stats, data); + } } else { - ops->get_ethtool_phy_stats(dev, &stats, data); + data = NULL; } ret = -EFAULT; diff --git a/net/core/failover.c b/net/core/failover.c index 4a92a98ccce9..b5cd3c727285 100644 --- a/net/core/failover.c +++ b/net/core/failover.c @@ -80,14 +80,14 @@ static int failover_slave_register(struct net_device *slave_dev) goto err_upper_link; } - slave_dev->priv_flags |= IFF_FAILOVER_SLAVE; + slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK); if (fops && fops->slave_register && !fops->slave_register(slave_dev, failover_dev)) return NOTIFY_OK; netdev_upper_dev_unlink(slave_dev, failover_dev); - slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; + slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK); err_upper_link: netdev_rx_handler_unregister(slave_dev); done: @@ -121,7 +121,7 @@ int failover_slave_unregister(struct net_device *slave_dev) netdev_rx_handler_unregister(slave_dev); netdev_upper_dev_unlink(slave_dev, failover_dev); - slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; + slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK); if (fops && fops->slave_unregister && !fops->slave_unregister(slave_dev, failover_dev)) diff --git a/net/core/filter.c b/net/core/filter.c index f274620945ff..27e61ffd9039 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1796,8 +1796,6 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = { BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk) { - sk = sk_to_full_sk(sk); - return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL; } @@ -4385,6 +4383,8 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, * Only binding to IP is supported. */ err = -EINVAL; + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return err; if (addr->sa_family == AF_INET) { if (addr_len < sizeof(struct sockaddr_in)) return err; @@ -5266,7 +5266,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = { .func = bpf_sk_release, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_SOCKET, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, }; BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, @@ -5407,8 +5407,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) { - sk = sk_to_full_sk(sk); - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) return (unsigned long)sk; @@ -5422,6 +5420,23 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = { .arg1_type = ARG_PTR_TO_SOCK_COMMON, }; +BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk) +{ + sk = sk_to_full_sk(sk); + + if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE)) + return (unsigned long)sk; + + return (unsigned long)NULL; +} + +static const struct bpf_func_proto bpf_get_listener_sock_proto = { + .func = bpf_get_listener_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, +}; + BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb) { unsigned int iphdr_len; @@ -5607,6 +5622,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #ifdef CONFIG_INET case BPF_FUNC_tcp_sock: return &bpf_tcp_sock_proto; + case BPF_FUNC_get_listener_sock: + return &bpf_get_listener_sock_proto; case BPF_FUNC_skb_ecn_set_ce: return &bpf_skb_ecn_set_ce_proto; #endif @@ -5702,6 +5719,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_release_proto; case BPF_FUNC_tcp_sock: return &bpf_tcp_sock_proto; + case BPF_FUNC_get_listener_sock: + return &bpf_get_listener_sock_proto; #endif default: return bpf_base_func_proto(func_id); @@ -6596,14 +6615,8 @@ static bool flow_dissector_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - if (type == BPF_WRITE) { - switch (off) { - case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): - break; - default: - return false; - } - } + if (type == BPF_WRITE) + return false; switch (off) { case bpf_ctx_range(struct __sk_buff, data): @@ -6615,11 +6628,7 @@ static bool flow_dissector_is_valid_access(int off, int size, case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): info->reg_type = PTR_TO_FLOW_KEYS; break; - case bpf_ctx_range(struct __sk_buff, tc_classid): - case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range_till(struct __sk_buff, family, local_port): - case bpf_ctx_range(struct __sk_buff, tstamp): - case bpf_ctx_range(struct __sk_buff, wire_len): + default: return false; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index bb1a54747d64..94a450b2191a 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -707,6 +707,7 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog, /* Pass parameters to the BPF program */ memset(flow_keys, 0, sizeof(*flow_keys)); cb->qdisc_cb.flow_keys = flow_keys; + flow_keys->n_proto = skb->protocol; flow_keys->nhoff = skb_network_offset(skb); flow_keys->thoff = flow_keys->nhoff; @@ -716,7 +717,8 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog, /* Restore state */ memcpy(cb, &cb_saved, sizeof(cb_saved)); - flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 0, skb->len); + flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, + skb_network_offset(skb), skb->len); flow_keys->thoff = clamp_t(u16, flow_keys->thoff, flow_keys->nhoff, skb->len); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 4ff661f6f989..8f8b7b6c2945 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -928,6 +928,8 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) if (error) return error; + dev_hold(queue->dev); + if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); if (error) { @@ -937,7 +939,6 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) } kobject_uevent(kobj, KOBJ_ADD); - dev_hold(queue->dev); return error; } @@ -1464,6 +1465,8 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) if (error) return error; + dev_hold(queue->dev); + #ifdef CONFIG_BQL error = sysfs_create_group(kobj, &dql_group); if (error) { @@ -1473,7 +1476,6 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) #endif kobject_uevent(kobj, KOBJ_ADD); - dev_hold(queue->dev); return 0; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 17f36317363d..7e6dcc625701 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -304,6 +304,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) refcount_set(&net->count, 1); refcount_set(&net->passive, 1); + get_random_bytes(&net->hash_mix, sizeof(u32)); net->dev_base_seq = 1; net->user_ns = user_ns; idr_init(&net->netns_ids); diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c index 703cf76aa7c2..7109c168b5e0 100644 --- a/net/core/ptp_classifier.c +++ b/net/core/ptp_classifier.c @@ -185,9 +185,10 @@ void __init ptp_classifier_init(void) { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, }; - struct sock_fprog_kern ptp_prog = { - .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, - }; + struct sock_fprog_kern ptp_prog; + + ptp_prog.len = ARRAY_SIZE(ptp_filter); + ptp_prog.filter = ptp_filter; BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog)); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a51cab95ba64..220c56e93659 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4948,7 +4948,7 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, { struct if_stats_msg *ifsm; - if (nlh->nlmsg_len < sizeof(*ifsm)) { + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifsm))) { NL_SET_ERR_MSG(extack, "Invalid header for stats dump"); return -EINVAL; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2415d9cb9b89..40796b8bf820 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3801,7 +3801,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) unsigned int delta_truesize; struct sk_buff *lp; - if (unlikely(p->len + len >= 65536)) + if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush)) return -E2BIG; lp = NAPI_GRO_CB(p)->last; @@ -5083,7 +5083,8 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { - int mac_len; + int mac_len, meta_len; + void *meta; if (skb_cow(skb, skb_headroom(skb)) < 0) { kfree_skb(skb); @@ -5095,6 +5096,13 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb), mac_len - VLAN_HLEN - ETH_TLEN); } + + meta_len = skb_metadata_len(skb); + if (meta_len) { + meta = skb_metadata_end(skb) - meta_len; + memmove(meta + VLAN_HLEN, meta, meta_len); + } + skb->mac_header += VLAN_HLEN; return skb; } diff --git a/net/core/sock.c b/net/core/sock.c index 782343bb925b..067878a1e4c5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -348,7 +348,7 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval) tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ; } - if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec }; *(struct old_timeval32 *)optval = tv32; return sizeof(tv32); @@ -372,7 +372,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool { struct __kernel_sock_timeval tv; - if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { struct old_timeval32 tv32; if (optlen < sizeof(tv32)) diff --git a/net/dccp/feat.c b/net/dccp/feat.c index f227f002c73d..db87d9f58019 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -738,7 +738,12 @@ static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local, if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len)) return -ENOMEM; - return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval); + if (dccp_feat_push_change(fn, feat, is_local, mandatory, &fval)) { + kfree(fval.sp.vec); + return -ENOMEM; + } + + return 0; } /** diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d5740bad5b18..57d84e9b7b6f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -436,8 +436,8 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; - newnp->mcast_oif = inet6_iif(skb); - newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; + newnp->mcast_oif = inet_iif(skb); + newnp->mcast_hops = ip_hdr(skb)->ttl; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index ed4f6dc26365..85c22ada4744 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -98,8 +98,18 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, return skb; } +static int qca_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, + int *offset) +{ + *offset = QCA_HDR_LEN; + *proto = ((__be16 *)skb->data)[0]; + + return 0; +} + const struct dsa_device_ops qca_netdev_ops = { .xmit = qca_tag_xmit, .rcv = qca_tag_rcv, + .flow_dissect = qca_tag_flow_dissect, .overhead = QCA_HDR_LEN, }; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 10e809b296ec..fb065a8937ea 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -226,7 +226,7 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto) tail[plen - 1] = proto; } -static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) +static int esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { int encap_type; struct udphdr *uh; @@ -234,6 +234,7 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru __be16 sport, dport; struct xfrm_encap_tmpl *encap = x->encap; struct ip_esp_hdr *esph = esp->esph; + unsigned int len; spin_lock_bh(&x->lock); sport = encap->encap_sport; @@ -241,11 +242,14 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru encap_type = encap->encap_type; spin_unlock_bh(&x->lock); + len = skb->len + esp->tailen - skb_transport_offset(skb); + if (len + sizeof(struct iphdr) >= IP_MAX_MTU) + return -EMSGSIZE; + uh = (struct udphdr *)esph; uh->source = sport; uh->dest = dport; - uh->len = htons(skb->len + esp->tailen - - skb_transport_offset(skb)); + uh->len = htons(len); uh->check = 0; switch (encap_type) { @@ -262,6 +266,8 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru *skb_mac_header(skb) = IPPROTO_UDP; esp->esph = esph; + + return 0; } int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) @@ -275,8 +281,12 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * int tailen = esp->tailen; /* this is non-NULL only with UDP Encapsulation */ - if (x->encap) - esp_output_udp_encap(x, skb, esp); + if (x->encap) { + int err = esp_output_udp_encap(x, skb, esp); + + if (err < 0) + return err; + } if (!skb_cloned(skb)) { if (tailen <= skb_tailroom(skb)) { diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 8756e0e790d2..d3170a8001b2 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -52,13 +52,13 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, goto out; if (sp->len == XFRM_MAX_DEPTH) - goto out; + goto out_reset; x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, (xfrm_address_t *)&ip_hdr(skb)->daddr, spi, IPPROTO_ESP, AF_INET); if (!x) - goto out; + goto out_reset; sp->xvec[sp->len++] = x; sp->olen++; @@ -66,7 +66,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, xo = xfrm_offload(skb); if (!xo) { xfrm_state_put(x); - goto out; + goto out_reset; } } @@ -82,6 +82,8 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, xfrm_input(skb, IPPROTO_ESP, spi, -2); return ERR_PTR(-EINPROGRESS); +out_reset: + secpath_reset(skb); out: skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 79e98e21cdd7..12ce6c526d72 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -121,6 +121,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) struct guehdr *guehdr; void *data; u16 doffset = 0; + u8 proto_ctype; if (!fou) return 1; @@ -212,13 +213,14 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) if (unlikely(guehdr->control)) return gue_control_message(skb, guehdr); + proto_ctype = guehdr->proto_ctype; __skb_pull(skb, sizeof(struct udphdr) + hdrlen); skb_reset_transport_header(skb); if (iptunnel_pull_offloads(skb)) goto drop; - return -guehdr->proto_ctype; + return -proto_ctype; drop: kfree_skb(skb); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index fd219f7bd3ea..4b0526441476 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -259,7 +259,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, struct net *net = dev_net(skb->dev); struct metadata_dst *tun_dst = NULL; struct erspan_base_hdr *ershdr; - struct erspan_metadata *pkt_md; struct ip_tunnel_net *itn; struct ip_tunnel *tunnel; const struct iphdr *iph; @@ -282,9 +281,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, if (unlikely(!pskb_may_pull(skb, len))) return PACKET_REJECT; - ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); - pkt_md = (struct erspan_metadata *)(ershdr + 1); - if (__iptunnel_pull_header(skb, len, htons(ETH_P_TEB), @@ -292,8 +288,9 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, goto drop; if (tunnel->collect_md) { + struct erspan_metadata *pkt_md, *md; struct ip_tunnel_info *info; - struct erspan_metadata *md; + unsigned char *gh; __be64 tun_id; __be16 flags; @@ -306,6 +303,14 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, if (!tun_dst) return PACKET_REJECT; + /* skb can be uncloned in __iptunnel_pull_header, so + * old pkt_md is no longer valid and we need to reset + * it + */ + gh = skb_network_header(skb) + + skb_network_header_len(skb); + pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len + + sizeof(*ershdr)); md = ip_tunnel_info_opts(&tun_dst->u.tun_info); md->version = ver; md2 = &md->u.md2; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index ecce2dc78f17..1132d6d1796a 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -257,11 +257,10 @@ int ip_local_deliver(struct sk_buff *skb) ip_local_deliver_finish); } -static inline bool ip_rcv_options(struct sk_buff *skb) +static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev) { struct ip_options *opt; const struct iphdr *iph; - struct net_device *dev = skb->dev; /* It looks as overkill, because not all IP options require packet mangling. @@ -297,7 +296,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb) } } - if (ip_options_rcv_srr(skb)) + if (ip_options_rcv_srr(skb, dev)) goto drop; } @@ -353,7 +352,7 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, } #endif - if (iph->ihl > 5 && ip_rcv_options(skb)) + if (iph->ihl > 5 && ip_rcv_options(skb, dev)) goto drop; rt = skb_rtable(skb); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 32a35043c9f5..3db31bb9df50 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -612,7 +612,7 @@ void ip_forward_options(struct sk_buff *skb) } } -int ip_options_rcv_srr(struct sk_buff *skb) +int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev) { struct ip_options *opt = &(IPCB(skb)->opt); int srrspace, srrptr; @@ -647,7 +647,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) orefdst = skb->_skb_refdst; skb_dst_set(skb, NULL); - err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); + err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev); rt2 = skb_rtable(skb); if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { skb_dst_drop(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c80188875f39..e8bb2e85c5a4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -519,6 +519,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->pkt_type = from->pkt_type; to->priority = from->priority; to->protocol = from->protocol; + to->skb_iif = from->skb_iif; skb_dst_drop(to); skb_dst_copy(to, from); to->dev = from->dev; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 68a21bf75dd0..35d8346742e2 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -646,10 +646,8 @@ static int __init vti_init(void) msg = "ipip tunnel"; err = xfrm4_tunnel_register(&ipip_handler, AF_INET); - if (err < 0) { - pr_info("%s: cant't register tunnel\n",__func__); + if (err < 0) goto xfrm_tunnel_failed; - } msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); @@ -659,9 +657,9 @@ static int __init vti_init(void) return err; rtnl_link_failed: - xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); -xfrm_tunnel_failed: xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +xfrm_tunnel_failed: + xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm_proto_ah_failed: @@ -676,6 +674,7 @@ pernet_dev_failed: static void __exit vti_fini(void) { rtnl_link_unregister(&vti_link_ops); + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 835d50b279f5..a2a88ab07f7b 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -56,7 +56,7 @@ struct clusterip_config { #endif enum clusterip_hashmode hash_mode; /* which hashing mode */ u_int32_t hash_initval; /* hash initialization */ - struct rcu_head rcu; /* for call_rcu_bh */ + struct rcu_head rcu; /* for call_rcu */ struct net *net; /* netns for pernet list */ char ifname[IFNAMSIZ]; /* device ifname */ }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a5da63e5faa2..6fdf1c195d8e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1183,11 +1183,39 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) return dst; } +static void ipv4_send_dest_unreach(struct sk_buff *skb) +{ + struct ip_options opt; + int res; + + /* Recompile ip options since IPCB may not be valid anymore. + * Also check we have a reasonable ipv4 header. + */ + if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) || + ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5) + return; + + memset(&opt, 0, sizeof(opt)); + if (ip_hdr(skb)->ihl > 5) { + if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4)) + return; + opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); + + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); + rcu_read_unlock(); + + if (res) + return; + } + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); +} + static void ipv4_link_failure(struct sk_buff *skb) { struct rtable *rt; - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + ipv4_send_dest_unreach(skb); rt = skb_rtable(skb); if (rt) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ba0fc4b18465..eeb4041fa5f9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -49,6 +49,7 @@ static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; static int comp_sack_nr_max = 255; static u32 u32_max_div_HZ = UINT_MAX / HZ; +static int one_day_secs = 24 * 3600; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -1151,7 +1152,9 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one_day_secs }, { .procname = "tcp_autocorking", diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index cd4814f7e962..477cb4aa456c 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -49,9 +49,8 @@ #define DCTCP_MAX_ALPHA 1024U struct dctcp { - u32 acked_bytes_ecn; - u32 acked_bytes_total; - u32 prior_snd_una; + u32 old_delivered; + u32 old_delivered_ce; u32 prior_rcv_nxt; u32 dctcp_alpha; u32 next_seq; @@ -67,19 +66,14 @@ static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA; module_param(dctcp_alpha_on_init, uint, 0644); MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value"); -static unsigned int dctcp_clamp_alpha_on_loss __read_mostly; -module_param(dctcp_clamp_alpha_on_loss, uint, 0644); -MODULE_PARM_DESC(dctcp_clamp_alpha_on_loss, - "parameter for clamping alpha on loss"); - static struct tcp_congestion_ops dctcp_reno; static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca) { ca->next_seq = tp->snd_nxt; - ca->acked_bytes_ecn = 0; - ca->acked_bytes_total = 0; + ca->old_delivered = tp->delivered; + ca->old_delivered_ce = tp->delivered_ce; } static void dctcp_init(struct sock *sk) @@ -91,7 +85,6 @@ static void dctcp_init(struct sock *sk) sk->sk_state == TCP_CLOSE)) { struct dctcp *ca = inet_csk_ca(sk); - ca->prior_snd_una = tp->snd_una; ca->prior_rcv_nxt = tp->rcv_nxt; ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); @@ -123,37 +116,25 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags) { const struct tcp_sock *tp = tcp_sk(sk); struct dctcp *ca = inet_csk_ca(sk); - u32 acked_bytes = tp->snd_una - ca->prior_snd_una; - - /* If ack did not advance snd_una, count dupack as MSS size. - * If ack did update window, do not count it at all. - */ - if (acked_bytes == 0 && !(flags & CA_ACK_WIN_UPDATE)) - acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss; - if (acked_bytes) { - ca->acked_bytes_total += acked_bytes; - ca->prior_snd_una = tp->snd_una; - - if (flags & CA_ACK_ECE) - ca->acked_bytes_ecn += acked_bytes; - } /* Expired RTT */ if (!before(tp->snd_una, ca->next_seq)) { - u64 bytes_ecn = ca->acked_bytes_ecn; + u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce; u32 alpha = ca->dctcp_alpha; /* alpha = (1 - g) * alpha + g * F */ alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); - if (bytes_ecn) { + if (delivered_ce) { + u32 delivered = tp->delivered - ca->old_delivered; + /* If dctcp_shift_g == 1, a 32bit value would overflow - * after 8 Mbytes. + * after 8 M packets. */ - bytes_ecn <<= (10 - dctcp_shift_g); - do_div(bytes_ecn, max(1U, ca->acked_bytes_total)); + delivered_ce <<= (10 - dctcp_shift_g); + delivered_ce /= max(1U, delivered); - alpha = min(alpha + (u32)bytes_ecn, DCTCP_MAX_ALPHA); + alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA); } /* dctcp_alpha can be read from dctcp_get_info() without * synchro, so we ask compiler to not use dctcp_alpha @@ -164,21 +145,23 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags) } } -static void dctcp_state(struct sock *sk, u8 new_state) +static void dctcp_react_to_loss(struct sock *sk) { - if (dctcp_clamp_alpha_on_loss && new_state == TCP_CA_Loss) { - struct dctcp *ca = inet_csk_ca(sk); + struct dctcp *ca = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); - /* If this extension is enabled, we clamp dctcp_alpha to - * max on packet loss; the motivation is that dctcp_alpha - * is an indicator to the extend of congestion and packet - * loss is an indicator of extreme congestion; setting - * this in practice turned out to be beneficial, and - * effectively assumes total congestion which reduces the - * window by half. - */ - ca->dctcp_alpha = DCTCP_MAX_ALPHA; - } + ca->loss_cwnd = tp->snd_cwnd; + tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U); +} + +static void dctcp_state(struct sock *sk, u8 new_state) +{ + if (new_state == TCP_CA_Recovery && + new_state != inet_csk(sk)->icsk_ca_state) + dctcp_react_to_loss(sk); + /* We handle RTO in dctcp_cwnd_event to ensure that we perform only + * one loss-adjustment per RTT. + */ } static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) @@ -190,6 +173,9 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) case CA_EVENT_ECN_NO_CE: dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state); break; + case CA_EVENT_LOSS: + dctcp_react_to_loss(sk); + break; default: /* Don't care for the rest. */ break; @@ -200,6 +186,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) { const struct dctcp *ca = inet_csk_ca(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* Fill it also in case of VEGASINFO due to req struct limits. * We can still correctly retrieve it later. @@ -211,8 +198,10 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, info->dctcp.dctcp_enabled = 1; info->dctcp.dctcp_ce_state = (u16) ca->ce_state; info->dctcp.dctcp_alpha = ca->dctcp_alpha; - info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn; - info->dctcp.dctcp_ab_tot = ca->acked_bytes_total; + info->dctcp.dctcp_ab_ecn = tp->mss_cache * + (tp->delivered_ce - ca->old_delivered_ce); + info->dctcp.dctcp_ab_tot = tp->mss_cache * + (tp->delivered - ca->old_delivered); } *attr = INET_DIAG_DCTCPINFO; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5def3c48870e..731d3045b50a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -402,11 +402,12 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + int room; + + room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh; /* Check #1 */ - if (tp->rcv_ssthresh < tp->window_clamp && - (int)tp->rcv_ssthresh < tcp_space(sk) && - !tcp_under_memory_pressure(sk)) { + if (room > 0 && !tcp_under_memory_pressure(sk)) { int incr; /* Check #2. Increase window, if skb with such overhead @@ -419,8 +420,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) if (incr) { incr = max_t(int, incr, 2 * skb->len); - tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, - tp->window_clamp); + tp->rcv_ssthresh += min(room, incr); inet_csk(sk)->icsk_ack.quick |= 1; } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 277d71239d75..a2896944aa37 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1673,7 +1673,9 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq || TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield || ((TCP_SKB_CB(tail)->tcp_flags | - TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_URG) || + TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) || + !((TCP_SKB_CB(tail)->tcp_flags & + TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) || ((TCP_SKB_CB(tail)->tcp_flags ^ TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) || #ifdef CONFIG_TLS_DEVICE @@ -1692,6 +1694,15 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq)) TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq; + /* We have to update both TCP_SKB_CB(tail)->tcp_flags and + * thtail->fin, so that the fast path in tcp_rcv_established() + * is not entered if we append a packet with a FIN. + * SYN, RST, URG are not present. + * ACK is set on both packets. + * PSH : we do not really care in TCP stack, + * at least for 'GRO' packets. + */ + thtail->fin |= th->fin; TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; if (TCP_SKB_CB(skb)->has_rxtstamp) { @@ -2578,7 +2589,8 @@ static void __net_exit tcp_sk_exit(struct net *net) { int cpu; - module_put(net->ipv4.tcp_congestion_control->owner); + if (net->ipv4.tcp_congestion_control) + module_put(net->ipv4.tcp_congestion_control->owner); for_each_possible_cpu(cpu) inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 64f9715173ac..065334b41d57 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -352,6 +352,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *pp = NULL; struct udphdr *uh2; struct sk_buff *p; + unsigned int ulen; /* requires non zero csum, for symmetry with GSO */ if (!uh->check) { @@ -359,6 +360,12 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, return NULL; } + /* Do not deal with padded or malicious packets, sorry ! */ + ulen = ntohs(uh->len); + if (ulen <= sizeof(*uh) || ulen != skb_gro_len(skb)) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } /* pull encapsulating udp header */ skb_gro_pull(skb, sizeof(struct udphdr)); skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); @@ -377,13 +384,14 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, /* Terminate the flow on len mismatch or if it grow "too much". * Under small packet flood GRO count could elsewhere grow a lot - * leading to execessive truesize values + * leading to excessive truesize values. + * On len mismatch merge the first packet shorter than gso_size, + * otherwise complete the GRO packet. */ - if (!skb_gro_receive(p, skb) && + if (ulen > ntohs(uh2->len) || skb_gro_receive(p, skb) || + ulen != ntohs(uh2->len) || NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX) pp = p; - else if (uh->len != uh2->len) - pp = p; return pp; } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index d73a6d6652f6..2b144b92ae46 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -111,7 +111,8 @@ static void _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) { const struct iphdr *iph = ip_hdr(skb); - u8 *xprth = skb_network_header(skb) + iph->ihl * 4; + int ihl = iph->ihl; + u8 *xprth = skb_network_header(skb) + ihl * 4; struct flowi4 *fl4 = &fl->u.ip4; int oif = 0; @@ -122,6 +123,11 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) fl4->flowi4_mark = skb->mark; fl4->flowi4_oif = reverse ? skb->skb_iif : oif; + fl4->flowi4_proto = iph->protocol; + fl4->daddr = reverse ? iph->saddr : iph->daddr; + fl4->saddr = reverse ? iph->daddr : iph->saddr; + fl4->flowi4_tos = iph->tos; + if (!ip_is_fragment(iph)) { switch (iph->protocol) { case IPPROTO_UDP: @@ -133,7 +139,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ports; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ports = (__be16 *)xprth; fl4->fl4_sport = ports[!!reverse]; @@ -146,7 +152,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 2 - skb->data)) { u8 *icmp; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; icmp = xprth; fl4->fl4_icmp_type = icmp[0]; @@ -159,7 +165,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be32 *ehdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ehdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ehdr[0]; @@ -171,7 +177,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 8 - skb->data)) { __be32 *ah_hdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ah_hdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ah_hdr[1]; @@ -183,7 +189,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ipcomp_hdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ipcomp_hdr = (__be16 *)xprth; fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); @@ -196,7 +202,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) __be16 *greflags; __be32 *gre_hdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; greflags = (__be16 *)xprth; gre_hdr = (__be32 *)xprth; @@ -213,10 +219,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; } } - fl4->flowi4_proto = iph->protocol; - fl4->daddr = reverse ? iph->saddr : iph->daddr; - fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos; } static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index d43d076c98f5..1766325423b5 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -476,7 +476,7 @@ static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh, } if (nlmsg_attrlen(nlh, sizeof(*ifal))) { - NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump requewst"); + NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump request"); return -EINVAL; } diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index d46b4eb645c2..cb99f6fb79b7 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -74,13 +74,13 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, goto out; if (sp->len == XFRM_MAX_DEPTH) - goto out; + goto out_reset; x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, (xfrm_address_t *)&ipv6_hdr(skb)->daddr, spi, IPPROTO_ESP, AF_INET6); if (!x) - goto out; + goto out_reset; sp->xvec[sp->len++] = x; sp->olen++; @@ -88,7 +88,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, xo = xfrm_offload(skb); if (!xo) { xfrm_state_put(x); - goto out; + goto out_reset; } } @@ -109,6 +109,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, xfrm_input(skb, IPPROTO_ESP, spi, -2); return ERR_PTR(-EINPROGRESS); +out_reset: + secpath_reset(skb); out: skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 79d2e43c05c5..5fc1f4e0c0cf 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -417,6 +417,7 @@ int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info) done: rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); return ret; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 6613d8dbb0e5..91247a6fc67f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -921,9 +921,7 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i, if (pcpu_rt) { struct fib6_info *from; - from = rcu_dereference_protected(pcpu_rt->from, - lockdep_is_held(&table->tb6_lock)); - rcu_assign_pointer(pcpu_rt->from, NULL); + from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL); fib6_info_release(from); } } diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index cb54a8a3c273..be5f3d7ceb96 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -94,15 +94,21 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) return fl; } +static void fl_free_rcu(struct rcu_head *head) +{ + struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu); + + if (fl->share == IPV6_FL_S_PROCESS) + put_pid(fl->owner.pid); + kfree(fl->opt); + kfree(fl); +} + static void fl_free(struct ip6_flowlabel *fl) { - if (fl) { - if (fl->share == IPV6_FL_S_PROCESS) - put_pid(fl->owner.pid); - kfree(fl->opt); - kfree_rcu(fl, rcu); - } + if (fl) + call_rcu(&fl->rcu, fl_free_rcu); } static void fl_release(struct ip6_flowlabel *fl) @@ -633,9 +639,9 @@ recheck: if (fl1->share == IPV6_FL_S_EXCL || fl1->share != fl->share || ((fl1->share == IPV6_FL_S_PROCESS) && - (fl1->owner.pid == fl->owner.pid)) || + (fl1->owner.pid != fl->owner.pid)) || ((fl1->share == IPV6_FL_S_USER) && - uid_eq(fl1->owner.uid, fl->owner.uid))) + !uid_eq(fl1->owner.uid, fl->owner.uid))) goto release; err = -ENOMEM; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b32c95f02128..655e46b227f9 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -525,10 +525,10 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) } static int ip6erspan_rcv(struct sk_buff *skb, - struct tnl_ptk_info *tpi) + struct tnl_ptk_info *tpi, + int gre_hdr_len) { struct erspan_base_hdr *ershdr; - struct erspan_metadata *pkt_md; const struct ipv6hdr *ipv6h; struct erspan_md2 *md2; struct ip6_tnl *tunnel; @@ -547,18 +547,16 @@ static int ip6erspan_rcv(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, len))) return PACKET_REJECT; - ershdr = (struct erspan_base_hdr *)skb->data; - pkt_md = (struct erspan_metadata *)(ershdr + 1); - if (__iptunnel_pull_header(skb, len, htons(ETH_P_TEB), false, false) < 0) return PACKET_REJECT; if (tunnel->parms.collect_md) { + struct erspan_metadata *pkt_md, *md; struct metadata_dst *tun_dst; struct ip_tunnel_info *info; - struct erspan_metadata *md; + unsigned char *gh; __be64 tun_id; __be16 flags; @@ -571,6 +569,14 @@ static int ip6erspan_rcv(struct sk_buff *skb, if (!tun_dst) return PACKET_REJECT; + /* skb can be uncloned in __iptunnel_pull_header, so + * old pkt_md is no longer valid and we need to reset + * it + */ + gh = skb_network_header(skb) + + skb_network_header_len(skb); + pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len + + sizeof(*ershdr)); info = &tun_dst->u.tun_info; md = ip_tunnel_info_opts(info); md->version = ver; @@ -607,7 +613,7 @@ static int gre_rcv(struct sk_buff *skb) if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || tpi.proto == htons(ETH_P_ERSPAN2))) { - if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD) + if (ip6erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; goto out; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index edbd12067170..e51f3c648b09 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -601,7 +601,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; - unsigned int mtu, hlen, left, len; + unsigned int mtu, hlen, left, len, nexthdr_offset; int hroom, troom; __be32 frag_id; int ptr, offset = 0, err = 0; @@ -612,6 +612,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, goto fail; hlen = err; nexthdr = *prevhdr; + nexthdr_offset = prevhdr - skb_network_header(skb); mtu = ip6_skb_dst_mtu(skb); @@ -646,6 +647,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, (err = skb_checksum_help(skb))) goto fail; + prevhdr = skb_network_header(skb) + nexthdr_offset; hroom = LL_RESERVED_SPACE(rt->dst.dev); if (skb_has_frag_list(skb)) { unsigned int first_len = skb_pagelen(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 0c6403cf8b52..ade1390c6348 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -627,7 +627,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->daddr, eiph->saddr, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0); - if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) { + if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) { if (!IS_ERR(rt)) ip_rt_put(rt); goto out; @@ -636,7 +636,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else { if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) + skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6) goto out; } diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c index 1059894a6f4c..4cb83fb69844 100644 --- a/net/ipv6/netfilter/ip6t_srh.c +++ b/net/ipv6/netfilter/ip6t_srh.c @@ -210,6 +210,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par) psidoff = srhoff + sizeof(struct ipv6_sr_hdr) + ((srh->segments_left + 1) * sizeof(struct in6_addr)); psid = skb_header_pointer(skb, psidoff, sizeof(_psid), &_psid); + if (!psid) + return false; if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_PSID, ipv6_masked_addr_cmp(psid, &srhinfo->psid_msk, &srhinfo->psid_addr))) @@ -223,6 +225,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par) nsidoff = srhoff + sizeof(struct ipv6_sr_hdr) + ((srh->segments_left - 1) * sizeof(struct in6_addr)); nsid = skb_header_pointer(skb, nsidoff, sizeof(_nsid), &_nsid); + if (!nsid) + return false; if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NSID, ipv6_masked_addr_cmp(nsid, &srhinfo->nsid_msk, &srhinfo->nsid_addr))) @@ -233,6 +237,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par) if (srhinfo->mt_flags & IP6T_SRH_LSID) { lsidoff = srhoff + sizeof(struct ipv6_sr_hdr); lsid = skb_header_pointer(skb, lsidoff, sizeof(_lsid), &_lsid); + if (!lsid) + return false; if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LSID, ipv6_masked_addr_cmp(lsid, &srhinfo->lsid_msk, &srhinfo->lsid_addr))) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4ef4bbdb49d4..0520aca3354b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -379,11 +379,8 @@ static void ip6_dst_destroy(struct dst_entry *dst) in6_dev_put(idev); } - rcu_read_lock(); - from = rcu_dereference(rt->from); - rcu_assign_pointer(rt->from, NULL); + from = xchg((__force struct fib6_info **)&rt->from, NULL); fib6_info_release(from); - rcu_read_unlock(); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -1040,14 +1037,20 @@ static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt) struct rt6_info *nrt; if (!fib6_info_hold_safe(rt)) - return NULL; + goto fallback; nrt = ip6_dst_alloc(dev_net(dev), dev, flags); - if (nrt) - ip6_rt_copy_init(nrt, rt); - else + if (!nrt) { fib6_info_release(rt); + goto fallback; + } + + ip6_rt_copy_init(nrt, rt); + return nrt; +fallback: + nrt = dev_net(dev)->ipv6.ip6_null_entry; + dst_hold(&nrt->dst); return nrt; } @@ -1096,10 +1099,6 @@ restart: dst_hold(&rt->dst); } else { rt = ip6_create_rt_rcu(f6i); - if (!rt) { - rt = net->ipv6.ip6_null_entry; - dst_hold(&rt->dst); - } } rcu_read_unlock(); @@ -1286,9 +1285,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket, /* purge completely the exception to allow releasing the held resources: * some [sk] cache may keep the dst around for unlimited time */ - from = rcu_dereference_protected(rt6_ex->rt6i->from, - lockdep_is_held(&rt6_exception_lock)); - rcu_assign_pointer(rt6_ex->rt6i->from, NULL); + from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL); fib6_info_release(from); dst_dev_put(&rt6_ex->rt6i->dst); @@ -2328,6 +2325,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, rcu_read_lock(); from = rcu_dereference(rt6->from); + if (!from) { + rcu_read_unlock(); + return; + } nrt6 = ip6_rt_cache_alloc(from, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); @@ -3391,11 +3392,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu rcu_read_lock(); from = rcu_dereference(rt->from); - /* This fib6_info_hold() is safe here because we hold reference to rt - * and rt already holds reference to fib6_info. - */ - fib6_info_hold(from); - rcu_read_unlock(); + if (!from) + goto out; nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL); if (!nrt) @@ -3407,10 +3405,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; - /* No need to remove rt from the exception table if rt is - * a cached route because rt6_insert_exception() will - * takes care of it - */ + /* rt6_insert_exception() will take care of duplicated exceptions */ if (rt6_insert_exception(nrt, from)) { dst_release_immediate(&nrt->dst); goto out; @@ -3423,7 +3418,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); out: - fib6_info_release(from); + rcu_read_unlock(); neigh_release(neigh); } @@ -3662,23 +3657,34 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) { - int type; struct dst_entry *dst = skb_dst(skb); + struct net *net = dev_net(dst->dev); + struct inet6_dev *idev; + int type; + + if (netif_is_l3_master(skb->dev) && + dst->dev == net->loopback_dev) + idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); + else + idev = ip6_dst_idev(dst); + switch (ipstats_mib_noroutes) { case IPSTATS_MIB_INNOROUTES: type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); if (type == IPV6_ADDR_ANY) { - IP6_INC_STATS(dev_net(dst->dev), - __in6_dev_get_safely(skb->dev), - IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); break; } /* FALLTHROUGH */ case IPSTATS_MIB_OUTNOROUTES: - IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), - ipstats_mib_noroutes); + IP6_INC_STATS(net, idev, ipstats_mib_noroutes); break; } + + /* Start over by dropping the dst for l3mdev case */ + if (netif_is_l3_master(skb->dev)) + skb_dst_drop(skb); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); kfree_skb(skb); return 0; @@ -5011,16 +5017,20 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, rcu_read_lock(); from = rcu_dereference(rt->from); - - if (fibmatch) - err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0); - else - err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, - &fl6.saddr, iif, RTM_NEWROUTE, - NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - 0); + if (from) { + if (fibmatch) + err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, + iif, RTM_NEWROUTE, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); + else + err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, + &fl6.saddr, iif, RTM_NEWROUTE, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); + } else { + err = -ENETUNREACH; + } rcu_read_unlock(); if (err < 0) { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 07e21a82ce4c..b2109b74857d 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -669,6 +669,10 @@ static int ipip6_rcv(struct sk_buff *skb) !net_eq(tunnel->net, dev_net(tunnel->dev)))) goto out; + /* skb can be uncloned in iptunnel_pull_header, so + * old iph is no longer valid + */ + iph = (const struct iphdr *)skb_mac_header(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 57ef69a10889..44d431849d39 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1110,11 +1110,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; - newnp->mcast_oif = tcp_v6_iif(skb); - newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); + newnp->mcast_oif = inet_iif(skb); + newnp->mcast_hops = ip_hdr(skb)->ttl; + newnp->rcv_flowinfo = 0; if (np->repflow) - newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); + newnp->flow_label = 0; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b444483cdb2b..622eeaf5732b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1047,6 +1047,8 @@ static void udp_v6_flush_pending_frames(struct sock *sk) static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return -EINVAL; /* The following checks are replicated from __ip6_datagram_connect() * and intended to prevent BPF program called below from accessing * bytes that are out of the bound specified by user in addr_len. diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index bc65db782bfb..d9e5f6808811 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -345,7 +345,7 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net) unsigned int i; xfrm_flush_gc(); - xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true); + xfrm_state_flush(net, 0, false, true); for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i])); @@ -402,6 +402,10 @@ static void __exit xfrm6_tunnel_fini(void) xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6); xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); unregister_pernet_subsys(&xfrm6_tunnel_net_ops); + /* Someone maybe has gotten the xfrm6_tunnel_spi. + * So need to wait it. + */ + rcu_barrier(); kmem_cache_destroy(xfrm6_tunnel_spi_kmem); } diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index c5c5ab6c5a1c..44fdc641710d 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -2054,14 +2054,14 @@ static int __init kcm_init(void) if (err) goto fail; - err = sock_register(&kcm_family_ops); - if (err) - goto sock_register_fail; - err = register_pernet_device(&kcm_net_ops); if (err) goto net_ops_fail; + err = sock_register(&kcm_family_ops); + if (err) + goto sock_register_fail; + err = kcm_proc_init(); if (err) goto proc_init_fail; @@ -2069,12 +2069,12 @@ static int __init kcm_init(void) return 0; proc_init_fail: - unregister_pernet_device(&kcm_net_ops); - -net_ops_fail: sock_unregister(PF_KCM); sock_register_fail: + unregister_pernet_device(&kcm_net_ops); + +net_ops_fail: proto_unregister(&kcm_proto); fail: @@ -2090,8 +2090,8 @@ fail: static void __exit kcm_exit(void) { kcm_proc_exit(); - unregister_pernet_device(&kcm_net_ops); sock_unregister(PF_KCM); + unregister_pernet_device(&kcm_net_ops); proto_unregister(&kcm_proto); destroy_workqueue(kcm_wq); diff --git a/net/key/af_key.c b/net/key/af_key.c index 5651c29cb5bd..4af1e1d60b9f 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1951,8 +1951,10 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) if (rq->sadb_x_ipsecrequest_mode == 0) return -EINVAL; + if (!xfrm_id_proto_valid(rq->sadb_x_ipsecrequest_proto)) + return -EINVAL; - t->id.proto = rq->sadb_x_ipsecrequest_proto; /* XXX check proto */ + t->id.proto = rq->sadb_x_ipsecrequest_proto; if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0) return -EINVAL; t->mode = mode; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index fed6becc5daf..52b5a2797c0c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -169,8 +169,8 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) rcu_read_lock_bh(); list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (tunnel->tunnel_id == tunnel_id) { - l2tp_tunnel_inc_refcount(tunnel); + if (tunnel->tunnel_id == tunnel_id && + refcount_inc_not_zero(&tunnel->ref_count)) { rcu_read_unlock_bh(); return tunnel; @@ -190,8 +190,8 @@ struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth) rcu_read_lock_bh(); list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (++count > nth) { - l2tp_tunnel_inc_refcount(tunnel); + if (++count > nth && + refcount_inc_not_zero(&tunnel->ref_count)) { rcu_read_unlock_bh(); return tunnel; } @@ -909,7 +909,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct l2tp_tunnel *tunnel; - tunnel = l2tp_tunnel(sk); + tunnel = rcu_dereference_sk_user_data(sk); if (tunnel == NULL) goto pass_up; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index b99e73a7e7e0..2017b7d780f5 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -320,14 +320,13 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) struct llc_sap *sap; int rc = -EINVAL; - dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); - lock_sock(sk); if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; if (unlikely(addr->sllc_family != AF_LLC)) goto out; + dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); rc = -ENODEV; rcu_read_lock(); if (sk->sk_bound_dev_if) { diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index cff0fb3578c9..deb3faf08337 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -841,7 +841,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) dir = sdata->vif.debugfs_dir; - if (!dir) + if (IS_ERR_OR_NULL(dir)) return; sprintf(buf, "netdev:%s", sdata->name); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 28d022a3eee3..ae4f0be3b393 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1195,6 +1195,9 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif); + if (local->in_reconfig) + return; + if (!check_sdata_in_driver(sdata)) return; diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index e03c46ac8e4d..c62101857b9b 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -112,8 +112,9 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, IEEE80211_HT_CAP_TX_STBC); /* Allow user to configure RX STBC bits */ - if (ht_capa_mask->cap_info & IEEE80211_HT_CAP_RX_STBC) - ht_cap->cap |= ht_capa->cap_info & IEEE80211_HT_CAP_RX_STBC; + if (ht_capa_mask->cap_info & cpu_to_le16(IEEE80211_HT_CAP_RX_STBC)) + ht_cap->cap |= le16_to_cpu(ht_capa->cap_info) & + IEEE80211_HT_CAP_RX_STBC; /* Allow user to decrease AMPDU factor */ if (ht_capa_mask->ampdu_params_info & diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 4a6ff1482a9f..02d2e6f11e93 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1908,6 +1908,9 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) list_del_rcu(&sdata->list); mutex_unlock(&sdata->local->iflist_mtx); + if (sdata->vif.txq) + ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq)); + synchronize_rcu(); if (sdata->dev) { diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 4700718e010f..37e372896230 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -167,8 +167,10 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) * The driver doesn't know anything about VLAN interfaces. * Hence, don't send GTKs for VLAN interfaces to the driver. */ - if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) + if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) { + ret = 1; goto out_unsupported; + } } ret = drv_set_key(key->local, SET_KEY, sdata, @@ -213,11 +215,8 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) /* all of these we can do in software - if driver can */ if (ret == 1) return 0; - if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) { - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - return 0; + if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) return -EINVAL; - } return 0; default: return -EINVAL; diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 95eb5064fa91..b76a2aefa9ec 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -23,7 +23,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath); static u32 mesh_table_hash(const void *addr, u32 len, u32 seed) { /* Use last four bytes of hw addr as hash index */ - return jhash_1word(*(u32 *)(addr+2), seed); + return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed); } static const struct rhashtable_params mesh_rht_params = { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7f8d93401ce0..bf0b187f994e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1568,7 +1568,15 @@ static void sta_ps_start(struct sta_info *sta) return; for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) { - if (txq_has_queue(sta->sta.txq[tid])) + struct ieee80211_txq *txq = sta->sta.txq[tid]; + struct txq_info *txqi = to_txq_info(txq); + + spin_lock(&local->active_txq_lock[txq->ac]); + if (!list_empty(&txqi->schedule_order)) + list_del_init(&txqi->schedule_order); + spin_unlock(&local->active_txq_lock[txq->ac]); + + if (txq_has_queue(txq)) set_bit(tid, &sta->txq_buffered_tids); else clear_bit(tid, &sta->txq_buffered_tids); diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h index 366b9e6f043e..40141df09f25 100644 --- a/net/mac80211/trace_msg.h +++ b/net/mac80211/trace_msg.h @@ -1,4 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions of this file + * Copyright (C) 2019 Intel Corporation + */ + #ifdef CONFIG_MAC80211_MESSAGE_TRACING #if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) @@ -11,7 +16,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM mac80211_msg -#define MAX_MSG_LEN 100 +#define MAX_MSG_LEN 120 DECLARE_EVENT_CLASS(mac80211_msg_event, TP_PROTO(struct va_format *vaf), diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8a49a74c0a37..2e816dd67be7 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3221,6 +3221,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, u8 max_subframes = sta->sta.max_amsdu_subframes; int max_frags = local->hw.max_tx_fragments; int max_amsdu_len = sta->sta.max_amsdu_len; + int orig_truesize; __be16 len; void *data; bool ret = false; @@ -3261,6 +3262,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (!head || skb_is_gso(head)) goto out; + orig_truesize = head->truesize; orig_len = head->len; if (skb->len + head->len > max_amsdu_len) @@ -3318,6 +3320,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, *frag_tail = skb; out_recalc: + fq->memory_usage += head->truesize - orig_truesize; if (head->len != orig_len) { flow->backlog += head->len - orig_len; tin->backlog_bytes += head->len - orig_len; @@ -3646,16 +3649,17 @@ EXPORT_SYMBOL(ieee80211_tx_dequeue); struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac) { struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_txq *ret = NULL; struct txq_info *txqi = NULL; - lockdep_assert_held(&local->active_txq_lock[ac]); + spin_lock_bh(&local->active_txq_lock[ac]); begin: txqi = list_first_entry_or_null(&local->active_txqs[ac], struct txq_info, schedule_order); if (!txqi) - return NULL; + goto out; if (txqi->txq.sta) { struct sta_info *sta = container_of(txqi->txq.sta, @@ -3672,24 +3676,30 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac) if (txqi->schedule_round == local->schedule_round[ac]) - return NULL; + goto out; list_del_init(&txqi->schedule_order); txqi->schedule_round = local->schedule_round[ac]; - return &txqi->txq; + ret = &txqi->txq; + +out: + spin_unlock_bh(&local->active_txq_lock[ac]); + return ret; } EXPORT_SYMBOL(ieee80211_next_txq); -void ieee80211_return_txq(struct ieee80211_hw *hw, - struct ieee80211_txq *txq) +void __ieee80211_schedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq, + bool force) { struct ieee80211_local *local = hw_to_local(hw); struct txq_info *txqi = to_txq_info(txq); - lockdep_assert_held(&local->active_txq_lock[txq->ac]); + spin_lock_bh(&local->active_txq_lock[txq->ac]); if (list_empty(&txqi->schedule_order) && - (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) { + (force || !skb_queue_empty(&txqi->frags) || + txqi->tin.backlog_packets)) { /* If airtime accounting is active, always enqueue STAs at the * head of the list to ensure that they only get moved to the * back by the airtime DRR scheduler once they have a negative @@ -3706,20 +3716,10 @@ void ieee80211_return_txq(struct ieee80211_hw *hw, list_add_tail(&txqi->schedule_order, &local->active_txqs[txq->ac]); } -} -EXPORT_SYMBOL(ieee80211_return_txq); -void ieee80211_schedule_txq(struct ieee80211_hw *hw, - struct ieee80211_txq *txq) - __acquires(txq_lock) __releases(txq_lock) -{ - struct ieee80211_local *local = hw_to_local(hw); - - spin_lock_bh(&local->active_txq_lock[txq->ac]); - ieee80211_return_txq(hw, txq); spin_unlock_bh(&local->active_txq_lock[txq->ac]); } -EXPORT_SYMBOL(ieee80211_schedule_txq); +EXPORT_SYMBOL(__ieee80211_schedule_txq); bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, struct ieee80211_txq *txq) @@ -3729,7 +3729,7 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, struct sta_info *sta; u8 ac = txq->ac; - lockdep_assert_held(&local->active_txq_lock[ac]); + spin_lock_bh(&local->active_txq_lock[ac]); if (!txqi->txq.sta) goto out; @@ -3759,34 +3759,27 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, sta->airtime[ac].deficit += sta->airtime_weight; list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]); + spin_unlock_bh(&local->active_txq_lock[ac]); return false; out: if (!list_empty(&txqi->schedule_order)) list_del_init(&txqi->schedule_order); + spin_unlock_bh(&local->active_txq_lock[ac]); return true; } EXPORT_SYMBOL(ieee80211_txq_may_transmit); void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac) - __acquires(txq_lock) { struct ieee80211_local *local = hw_to_local(hw); spin_lock_bh(&local->active_txq_lock[ac]); local->schedule_round[ac]++; -} -EXPORT_SYMBOL(ieee80211_txq_schedule_start); - -void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac) - __releases(txq_lock) -{ - struct ieee80211_local *local = hw_to_local(hw); - spin_unlock_bh(&local->active_txq_lock[ac]); } -EXPORT_SYMBOL(ieee80211_txq_schedule_end); +EXPORT_SYMBOL(ieee80211_txq_schedule_start); void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index dda8930f20e7..f3a8557494d6 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -140,9 +140,15 @@ static int mpls_xmit(struct sk_buff *skb) if (rt) err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gateway, skb); - else if (rt6) - err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway, - skb); + else if (rt6) { + if (ipv6_addr_v4mapped(&rt6->rt6i_gateway)) { + /* 6PE (RFC 4798) */ + err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt6->rt6i_gateway.s6_addr32[3], + skb); + } else + err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway, + skb); + } if (err) net_dbg_ratelimited("%s: packet transmission failed: %d\n", __func__, err); diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index 5d782445d2fc..bad17bba8ba7 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -251,6 +251,10 @@ static int ncsi_pkg_info_all_nl(struct sk_buff *skb, } attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST); + if (!attr) { + rc = -EMSGSIZE; + goto err; + } rc = ncsi_write_package_info(skb, ndp, package->id); if (rc) { nla_nest_cancel(skb, attr); diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index dc07fcc7938e..802db01e3075 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/netdevice.h> +#include <linux/etherdevice.h> #include <linux/skbuff.h> #include <net/ncsi.h> @@ -667,7 +668,10 @@ static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr) ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN); /* Increase mac address by 1 for BMC's address */ - saddr.sa_data[ETH_ALEN - 1]++; + eth_addr_inc((u8 *)saddr.sa_data); + if (!is_valid_ether_addr((const u8 *)saddr.sa_data)) + return -ENXIO; + ret = ops->ndo_set_mac_address(ndev, &saddr); if (ret < 0) netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index d43ffb09939b..6548271209a0 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1007,6 +1007,7 @@ config NETFILTER_XT_TARGET_TEE depends on NETFILTER_ADVANCED depends on IPV6 || IPV6=n depends on !NF_CONNTRACK || NF_CONNTRACK + depends on IP6_NF_IPTABLES || !IP6_NF_IPTABLES select NF_DUP_IPV4 select NF_DUP_IPV6 if IP6_NF_IPTABLES ---help--- diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 43bbaa32b1d6..14457551bcb4 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1678,7 +1678,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, if (!cp) { int v; - if (!sysctl_schedule_icmp(ipvs)) + if (ipip || !sysctl_schedule_icmp(ipvs)) return NF_ACCEPT; if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph)) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 82bfbeef46af..2a714527cde1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/random.h> #include <linux/jhash.h> +#include <linux/siphash.h> #include <linux/err.h> #include <linux/percpu.h> #include <linux/moduleparam.h> @@ -449,6 +450,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, } EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); +/* Generate a almost-unique pseudo-id for a given conntrack. + * + * intentionally doesn't re-use any of the seeds used for hash + * table location, we assume id gets exposed to userspace. + * + * Following nf_conn items do not change throughout lifetime + * of the nf_conn after it has been committed to main hash table: + * + * 1. nf_conn address + * 2. nf_conn->ext address + * 3. nf_conn->master address (normally NULL) + * 4. tuple + * 5. the associated net namespace + */ +u32 nf_ct_get_id(const struct nf_conn *ct) +{ + static __read_mostly siphash_key_t ct_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); + + a = (unsigned long)ct; + b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct)); + c = (unsigned long)ct->ext; + d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash), + &ct_id_seed); +#ifdef CONFIG_64BIT + return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed); +#else + return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed); +#endif +} +EXPORT_SYMBOL_GPL(nf_ct_get_id); + static void clean_from_lists(struct nf_conn *ct) { @@ -982,12 +1017,9 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* set conntrack timestamp, if enabled. */ tstamp = nf_conn_tstamp_find(ct); - if (tstamp) { - if (skb->tstamp == 0) - __net_timestamp(skb); + if (tstamp) + tstamp->start = ktime_get_real_ns(); - tstamp->start = ktime_to_ns(skb->tstamp); - } /* Since the lookup is lockless, hash insertion must be done after * starting the timer and setting the CONFIRMED bit. The RCU barriers * guarantee that no other CPU can find the conntrack before the above @@ -1350,6 +1382,7 @@ __nf_conntrack_alloc(struct net *net, /* save hash for reusing when confirming */ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; ct->status = 0; + ct->timeout = 0; write_pnet(&ct->ct_net, net); memset(&ct->__nfct_init_offset[0], 0, offsetof(struct nf_conn, proto) - diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 66c596d287a5..d7f61b0547c6 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -29,6 +29,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/slab.h> +#include <linux/siphash.h> #include <linux/netfilter.h> #include <net/netlink.h> @@ -485,7 +486,9 @@ nla_put_failure: static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { - if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct))) + __be32 id = (__force __be32)nf_ct_get_id(ct); + + if (nla_put_be32(skb, CTA_ID, id)) goto nla_put_failure; return 0; @@ -1286,8 +1289,9 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, } if (cda[CTA_ID]) { - u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); - if (id != (u32)(unsigned long)ct) { + __be32 id = nla_get_be32(cda[CTA_ID]); + + if (id != (__force __be32)nf_ct_get_id(ct)) { nf_ct_put(ct); return -ENOENT; } @@ -2692,6 +2696,25 @@ nla_put_failure: static const union nf_inet_addr any_addr; +static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp) +{ + static __read_mostly siphash_key_t exp_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&exp_id_seed, sizeof(exp_id_seed)); + + a = (unsigned long)exp; + b = (unsigned long)exp->helper; + c = (unsigned long)exp->master; + d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed); + +#ifdef CONFIG_64BIT + return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed); +#else + return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed); +#endif +} + static int ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) @@ -2739,7 +2762,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, } #endif if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) || - nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) || + nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) || nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) goto nla_put_failure; @@ -3044,7 +3067,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); - if (ntohl(id) != (u32)(unsigned long)exp) { + + if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); return -ENOENT; } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index b9403a266a2e..37bb530d848f 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -55,7 +55,7 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb, struct va_format vaf; va_list args; - if (net->ct.sysctl_log_invalid != protonum || + if (net->ct.sysctl_log_invalid != protonum && net->ct.sysctl_log_invalid != IPPROTO_RAW) return; diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c index 7df477996b16..9becac953587 100644 --- a/net/netfilter/nf_conntrack_proto_icmp.c +++ b/net/netfilter/nf_conntrack_proto_icmp.c @@ -103,49 +103,94 @@ int nf_conntrack_icmp_packet(struct nf_conn *ct, return NF_ACCEPT; } -/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ -static int -icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb, - const struct nf_hook_state *state) +/* Check inner header is related to any of the existing connections */ +int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb, + unsigned int dataoff, + const struct nf_hook_state *state, + u8 l4proto, union nf_inet_addr *outer_daddr) { struct nf_conntrack_tuple innertuple, origtuple; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_zone *zone; enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; + union nf_inet_addr *ct_daddr; + enum ip_conntrack_dir dir; + struct nf_conn *ct; WARN_ON(skb_nfct(skb)); zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); /* Are they talking about one of our connections? */ - if (!nf_ct_get_tuplepr(skb, - skb_network_offset(skb) + ip_hdrlen(skb) - + sizeof(struct icmphdr), - PF_INET, state->net, &origtuple)) { - pr_debug("icmp_error_message: failed to get tuple\n"); + if (!nf_ct_get_tuplepr(skb, dataoff, + state->pf, state->net, &origtuple)) return -NF_ACCEPT; - } /* Ordinarily, we'd expect the inverted tupleproto, but it's been preserved inside the ICMP. */ - if (!nf_ct_invert_tuple(&innertuple, &origtuple)) { - pr_debug("icmp_error_message: no match\n"); + if (!nf_ct_invert_tuple(&innertuple, &origtuple)) return -NF_ACCEPT; - } - - ctinfo = IP_CT_RELATED; h = nf_conntrack_find_get(state->net, zone, &innertuple); - if (!h) { - pr_debug("icmp_error_message: no match\n"); + if (!h) + return -NF_ACCEPT; + + /* Consider: A -> T (=This machine) -> B + * Conntrack entry will look like this: + * Original: A->B + * Reply: B->T (SNAT case) OR A + * + * When this function runs, we got packet that looks like this: + * iphdr|icmphdr|inner_iphdr|l4header (tcp, udp, ..). + * + * Above nf_conntrack_find_get() makes lookup based on inner_hdr, + * so we should expect that destination of the found connection + * matches outer header destination address. + * + * In above example, we can consider these two cases: + * 1. Error coming in reply direction from B or M (middle box) to + * T (SNAT case) or A. + * Inner saddr will be B, dst will be T or A. + * The found conntrack will be reply tuple (B->T/A). + * 2. Error coming in original direction from A or M to B. + * Inner saddr will be A, inner daddr will be B. + * The found conntrack will be original tuple (A->B). + * + * In both cases, conntrack[dir].dst == inner.dst. + * + * A bogus packet could look like this: + * Inner: B->T + * Outer: B->X (other machine reachable by T). + * + * In this case, lookup yields connection A->B and will + * set packet from B->X as *RELATED*, even though no connection + * from X was ever seen. + */ + ct = nf_ct_tuplehash_to_ctrack(h); + dir = NF_CT_DIRECTION(h); + ct_daddr = &ct->tuplehash[dir].tuple.dst.u3; + if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) { + if (state->pf == AF_INET) { + nf_l4proto_log_invalid(skb, state->net, state->pf, + l4proto, + "outer daddr %pI4 != inner %pI4", + &outer_daddr->ip, &ct_daddr->ip); + } else if (state->pf == AF_INET6) { + nf_l4proto_log_invalid(skb, state->net, state->pf, + l4proto, + "outer daddr %pI6 != inner %pI6", + &outer_daddr->ip6, &ct_daddr->ip6); + } + nf_ct_put(ct); return -NF_ACCEPT; } - if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) + ctinfo = IP_CT_RELATED; + if (dir == IP_CT_DIR_REPLY) ctinfo += IP_CT_IS_REPLY; /* Update skb to refer to this connection */ - nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); + nf_ct_set(skb, ct, ctinfo); return NF_ACCEPT; } @@ -162,11 +207,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state) { + union nf_inet_addr outer_daddr; const struct icmphdr *icmph; struct icmphdr _ih; /* Not enough header? */ - icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); + icmph = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmph == NULL) { icmp_error_log(skb, state, "short packet"); return -NF_ACCEPT; @@ -199,7 +245,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl, icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(tmpl, skb, state); + memset(&outer_daddr, 0, sizeof(outer_daddr)); + outer_daddr.ip = ip_hdr(skb)->daddr; + + dataoff += sizeof(*icmph); + return nf_conntrack_inet_error(tmpl, skb, dataoff, state, + IPPROTO_ICMP, &outer_daddr); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index bec4a3211658..c63ee3612855 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -123,51 +123,6 @@ int nf_conntrack_icmpv6_packet(struct nf_conn *ct, return NF_ACCEPT; } -static int -icmpv6_error_message(struct net *net, struct nf_conn *tmpl, - struct sk_buff *skb, - unsigned int icmp6off) -{ - struct nf_conntrack_tuple intuple, origtuple; - const struct nf_conntrack_tuple_hash *h; - enum ip_conntrack_info ctinfo; - struct nf_conntrack_zone tmp; - - WARN_ON(skb_nfct(skb)); - - /* Are they talking about one of our connections? */ - if (!nf_ct_get_tuplepr(skb, - skb_network_offset(skb) - + sizeof(struct ipv6hdr) - + sizeof(struct icmp6hdr), - PF_INET6, net, &origtuple)) { - pr_debug("icmpv6_error: Can't get tuple\n"); - return -NF_ACCEPT; - } - - /* Ordinarily, we'd expect the inverted tupleproto, but it's - been preserved inside the ICMP. */ - if (!nf_ct_invert_tuple(&intuple, &origtuple)) { - pr_debug("icmpv6_error: Can't invert tuple\n"); - return -NF_ACCEPT; - } - - ctinfo = IP_CT_RELATED; - - h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp), - &intuple); - if (!h) { - pr_debug("icmpv6_error: no match\n"); - return -NF_ACCEPT; - } else { - if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - ctinfo += IP_CT_IS_REPLY; - } - - /* Update skb to refer to this connection */ - nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); - return NF_ACCEPT; -} static void icmpv6_error_log(const struct sk_buff *skb, const struct nf_hook_state *state, @@ -182,6 +137,7 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, unsigned int dataoff, const struct nf_hook_state *state) { + union nf_inet_addr outer_daddr; const struct icmp6hdr *icmp6h; struct icmp6hdr _ih; int type; @@ -210,7 +166,11 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; - return icmpv6_error_message(state->net, tmpl, skb, dataoff); + memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr, + sizeof(outer_daddr.ip6)); + dataoff += sizeof(*icmp6h); + return nf_conntrack_inet_error(tmpl, skb, dataoff, state, + IPPROTO_ICMPV6, &outer_daddr); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index f067c6b50857..39fcc1ed18f3 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -20,9 +20,9 @@ #include <linux/udp.h> #include <linux/tcp.h> #include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> -#include <net/route.h> -#include <net/ip6_route.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_expect.h> @@ -871,38 +871,33 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, } else if (sip_external_media) { struct net_device *dev = skb_dst(skb)->dev; struct net *net = dev_net(dev); - struct rtable *rt; - struct flowi4 fl4 = {}; -#if IS_ENABLED(CONFIG_IPV6) - struct flowi6 fl6 = {}; -#endif + struct flowi fl; struct dst_entry *dst = NULL; + memset(&fl, 0, sizeof(fl)); + switch (nf_ct_l3num(ct)) { case NFPROTO_IPV4: - fl4.daddr = daddr->ip; - rt = ip_route_output_key(net, &fl4); - if (!IS_ERR(rt)) - dst = &rt->dst; + fl.u.ip4.daddr = daddr->ip; + nf_ip_route(net, &dst, &fl, false); break; -#if IS_ENABLED(CONFIG_IPV6) case NFPROTO_IPV6: - fl6.daddr = daddr->in6; - dst = ip6_route_output(net, NULL, &fl6); - if (dst->error) { - dst_release(dst); - dst = NULL; - } + fl.u.ip6.daddr = daddr->in6; + nf_ip6_route(net, &dst, &fl, false); break; -#endif } /* Don't predict any conntracks when media endpoint is reachable * through the same interface as the signalling peer. */ - if (dst && dst->dev == dev) - return NF_ACCEPT; + if (dst) { + bool external_media = (dst->dev == dev); + + dst_release(dst); + if (external_media) + return NF_ACCEPT; + } } /* We need to check whether the registration exists before attempting diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index af7dc6537758..000952719adf 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -415,9 +415,14 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, case IPPROTO_ICMPV6: /* id is same for either direction... */ keyptr = &tuple->src.u.icmp.id; - min = range->min_proto.icmp.id; - range_size = ntohs(range->max_proto.icmp.id) - - ntohs(range->min_proto.icmp.id) + 1; + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { + min = 0; + range_size = 65536; + } else { + min = ntohs(range->min_proto.icmp.id); + range_size = ntohs(range->max_proto.icmp.id) - + ntohs(range->min_proto.icmp.id) + 1; + } goto find_free_id; #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE) case IPPROTO_GRE: diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 513f93118604..1606eaa5ae0d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1545,7 +1545,7 @@ static int nft_chain_parse_hook(struct net *net, if (IS_ERR(type)) return PTR_ERR(type); } - if (!(type->hook_mask & (1 << hook->num))) + if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num))) return -EOPNOTSUPP; if (type->type == NFT_CHAIN_T_NAT && @@ -2806,8 +2806,11 @@ err2: nf_tables_rule_release(&ctx, rule); err1: for (i = 0; i < n; i++) { - if (info[i].ops != NULL) + if (info[i].ops) { module_put(info[i].ops->type->owner); + if (info[i].ops->type->release_ops) + info[i].ops->type->release_ops(info[i].ops); + } } kvfree(info); return err; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b1f9c5303f02..0b3347570265 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -540,7 +540,7 @@ __build_packet_message(struct nfnl_log_net *log, goto nla_put_failure; } - if (skb->tstamp) { + if (hooknum <= NF_INET_FORWARD && skb->tstamp) { struct nfulnl_msg_packet_timestamp ts; struct timespec64 kts = ktime_to_timespec64(skb->tstamp); ts.sec = cpu_to_be64(kts.tv_sec); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 0dcc3592d053..e057b2961d31 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -582,7 +582,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (nfqnl_put_bridge(entry, skb) < 0) goto nla_put_failure; - if (entskb->tstamp) { + if (entry->state.hook <= NF_INET_FORWARD && entskb->tstamp) { struct nfqnl_msg_packet_timestamp ts; struct timespec64 kts = ktime_to_timespec64(entskb->tstamp); diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 457a9ceb46af..8dfa798ea683 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -65,21 +65,34 @@ nla_put_failure: return -1; } -static void nft_objref_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static void nft_objref_deactivate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_object *obj = nft_objref_priv(expr); + if (phase == NFT_TRANS_COMMIT) + return; + obj->use--; } +static void nft_objref_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_object *obj = nft_objref_priv(expr); + + obj->use++; +} + static struct nft_expr_type nft_objref_type; static const struct nft_expr_ops nft_objref_ops = { .type = &nft_objref_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_object *)), .eval = nft_objref_eval, .init = nft_objref_init, - .destroy = nft_objref_destroy, + .activate = nft_objref_activate, + .deactivate = nft_objref_deactivate, .dump = nft_objref_dump, }; diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index f8092926f704..a340cd8a751b 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -233,5 +233,5 @@ module_exit(nft_redir_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); -MODULE_ALIAS_NFT_AF_EXPR(AF_INET4, "redir"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir"); MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index fa61208371f8..321a0036fdf5 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -308,10 +308,6 @@ static void *nft_rbtree_deactivate(const struct net *net, else if (d > 0) parent = parent->rb_right; else { - if (!nft_set_elem_active(&rbe->ext, genmask)) { - parent = parent->rb_left; - continue; - } if (nft_rbtree_interval_end(rbe) && !nft_rbtree_interval_end(this)) { parent = parent->rb_left; @@ -320,6 +316,9 @@ static void *nft_rbtree_deactivate(const struct net *net, nft_rbtree_interval_end(this)) { parent = parent->rb_right; continue; + } else if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; } nft_rbtree_flush(net, set, rbe); return rbe; diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index c13bcd0ab491..8dbb4d48f2ed 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -163,19 +163,24 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par) s64 stamp; /* - * We cannot use get_seconds() instead of __net_timestamp() here. + * We need real time here, but we can neither use skb->tstamp + * nor __net_timestamp(). + * + * skb->tstamp and skb->skb_mstamp_ns overlap, however, they + * use different clock types (real vs monotonic). + * * Suppose you have two rules: - * 1. match before 13:00 - * 2. match after 13:00 + * 1. match before 13:00 + * 2. match after 13:00 + * * If you match against processing time (get_seconds) it * may happen that the same packet matches both rules if - * it arrived at the right moment before 13:00. + * it arrived at the right moment before 13:00, so it would be + * better to check skb->tstamp and set it via __net_timestamp() + * if needed. This however breaks outgoing packets tx timestamp, + * and causes them to get delayed forever by fq packet scheduler. */ - if (skb->tstamp == 0) - __net_timestamp((struct sk_buff *)skb); - - stamp = ktime_to_ns(skb->tstamp); - stamp = div_s64(stamp, NSEC_PER_SEC); + stamp = get_seconds(); if (info->flags & XT_TIME_LOCAL_TZ) /* Adjust for local timezone */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f28e937320a3..216ab915dd54 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -988,7 +988,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err = 0; - unsigned long groups = nladdr->nl_groups; + unsigned long groups; bool bound; if (addr_len < sizeof(struct sockaddr_nl)) @@ -996,6 +996,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nladdr->nl_family != AF_NETLINK) return -EINVAL; + groups = nladdr->nl_groups; /* Only superuser is allowed to listen multicasts */ if (groups) { diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 25eeb6d2a75a..cb69d35c8e6a 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -362,11 +362,11 @@ int genl_register_family(struct genl_family *family) } else family->attrbuf = NULL; - family->id = idr_alloc(&genl_fam_idr, family, - start, end + 1, GFP_KERNEL); + family->id = idr_alloc_cyclic(&genl_fam_idr, family, + start, end + 1, GFP_KERNEL); if (family->id < 0) { err = family->id; - goto errout_locked; + goto errout_free; } err = genl_validate_assign_mc_groups(family); @@ -385,6 +385,7 @@ int genl_register_family(struct genl_family *family) errout_remove: idr_remove(&genl_fam_idr, family->id); +errout_free: kfree(family->attrbuf); errout_locked: genl_unlock_all(); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 1d3144d19903..71ffd1a6dc7c 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1392,18 +1392,22 @@ static int __init nr_proto_init(void) int i; int rc = proto_register(&nr_proto, 0); - if (rc != 0) - goto out; + if (rc) + return rc; if (nr_ndevs > 0x7fffffff/sizeof(struct net_device *)) { - printk(KERN_ERR "NET/ROM: nr_proto_init - nr_ndevs parameter to large\n"); - return -1; + pr_err("NET/ROM: %s - nr_ndevs parameter too large\n", + __func__); + rc = -EINVAL; + goto unregister_proto; } dev_nr = kcalloc(nr_ndevs, sizeof(struct net_device *), GFP_KERNEL); - if (dev_nr == NULL) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n"); - return -1; + if (!dev_nr) { + pr_err("NET/ROM: %s - unable to allocate device array\n", + __func__); + rc = -ENOMEM; + goto unregister_proto; } for (i = 0; i < nr_ndevs; i++) { @@ -1413,13 +1417,13 @@ static int __init nr_proto_init(void) sprintf(name, "nr%d", i); dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup); if (!dev) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n"); + rc = -ENOMEM; goto fail; } dev->base_addr = i; - if (register_netdev(dev)) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register network device\n"); + rc = register_netdev(dev); + if (rc) { free_netdev(dev); goto fail; } @@ -1427,36 +1431,64 @@ static int __init nr_proto_init(void) dev_nr[i] = dev; } - if (sock_register(&nr_family_ops)) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register socket family\n"); + rc = sock_register(&nr_family_ops); + if (rc) goto fail; - } - register_netdevice_notifier(&nr_dev_notifier); + rc = register_netdevice_notifier(&nr_dev_notifier); + if (rc) + goto out_sock; ax25_register_pid(&nr_pid); ax25_linkfail_register(&nr_linkfail_notifier); #ifdef CONFIG_SYSCTL - nr_register_sysctl(); + rc = nr_register_sysctl(); + if (rc) + goto out_sysctl; #endif nr_loopback_init(); - proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops); - proc_create_seq("nr_neigh", 0444, init_net.proc_net, &nr_neigh_seqops); - proc_create_seq("nr_nodes", 0444, init_net.proc_net, &nr_node_seqops); -out: - return rc; + rc = -ENOMEM; + if (!proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops)) + goto proc_remove1; + if (!proc_create_seq("nr_neigh", 0444, init_net.proc_net, + &nr_neigh_seqops)) + goto proc_remove2; + if (!proc_create_seq("nr_nodes", 0444, init_net.proc_net, + &nr_node_seqops)) + goto proc_remove3; + + return 0; + +proc_remove3: + remove_proc_entry("nr_neigh", init_net.proc_net); +proc_remove2: + remove_proc_entry("nr", init_net.proc_net); +proc_remove1: + + nr_loopback_clear(); + nr_rt_free(); + +#ifdef CONFIG_SYSCTL + nr_unregister_sysctl(); +out_sysctl: +#endif + ax25_linkfail_release(&nr_linkfail_notifier); + ax25_protocol_release(AX25_P_NETROM); + unregister_netdevice_notifier(&nr_dev_notifier); +out_sock: + sock_unregister(PF_NETROM); fail: while (--i >= 0) { unregister_netdev(dev_nr[i]); free_netdev(dev_nr[i]); } kfree(dev_nr); +unregister_proto: proto_unregister(&nr_proto); - rc = -1; - goto out; + return rc; } module_init(nr_proto_init); diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c index 215ad22a9647..93d13f019981 100644 --- a/net/netrom/nr_loopback.c +++ b/net/netrom/nr_loopback.c @@ -70,7 +70,7 @@ static void nr_loopback_timer(struct timer_list *unused) } } -void __exit nr_loopback_clear(void) +void nr_loopback_clear(void) { del_timer_sync(&loopback_timer); skb_queue_purge(&loopback_queue); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 6485f593e2f0..b76aa668a94b 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -953,7 +953,7 @@ const struct seq_operations nr_neigh_seqops = { /* * Free all memory associated with the nodes and routes lists. */ -void __exit nr_rt_free(void) +void nr_rt_free(void) { struct nr_neigh *s = NULL; struct nr_node *t = NULL; diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index ba1c368b3f18..771011b84270 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -146,9 +146,12 @@ static struct ctl_table nr_table[] = { { } }; -void __init nr_register_sysctl(void) +int __init nr_register_sysctl(void) { nr_table_header = register_net_sysctl(&init_net, "net/netrom", nr_table); + if (!nr_table_header) + return -ENOMEM; + return 0; } void nr_unregister_sysctl(void) diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index ae296273ce3d..17dcd0b5eb32 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -726,6 +726,10 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, llcp_sock->service_name = kmemdup(addr->service_name, llcp_sock->service_name_len, GFP_KERNEL); + if (!llcp_sock->service_name) { + ret = -ENOMEM; + goto sock_llcp_release; + } nfc_llcp_sock_link(&local->connecting_sockets, sk); @@ -745,10 +749,11 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, return ret; sock_unlink: - nfc_llcp_put_ssap(local, llcp_sock->ssap); - nfc_llcp_sock_unlink(&local->connecting_sockets, sk); +sock_llcp_release: + nfc_llcp_put_ssap(local, llcp_sock->ssap); + put_dev: nfc_put_device(dev); diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index ddfc52ac1f9b..c0d323b58e73 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -312,6 +312,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, create_info = (struct nci_hci_create_pipe_resp *)skb->data; dest_gate = create_info->dest_gate; new_pipe = create_info->pipe; + if (new_pipe >= NCI_HCI_MAX_PIPES) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } /* Save the new created pipe and bind with local gate, * the description for skb->data[3] is destination gate id @@ -336,6 +340,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, goto exit; } delete_info = (struct nci_hci_delete_pipe_noti *)skb->data; + if (delete_info->pipe >= NCI_HCI_MAX_PIPES) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } ndev->hci_dev->pipes[delete_info->pipe].gate = NCI_HCI_INVALID_GATE; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6679e96ab1dc..9dd158ab51b3 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -448,6 +448,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd); + if (!upcall) { + err = -EINVAL; + goto out; + } upcall->dp_ifindex = dp_ifindex; err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); @@ -460,6 +464,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, if (upcall_info->egress_tun_info) { nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); + if (!nla) { + err = -EMSGSIZE; + goto out; + } err = ovs_nla_put_tunnel_info(user_skb, upcall_info->egress_tun_info); BUG_ON(err); @@ -468,6 +476,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, if (upcall_info->actions_len) { nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS); + if (!nla) { + err = -EMSGSIZE; + goto out; + } err = ovs_nla_put_actions(upcall_info->actions, upcall_info->actions_len, user_skb); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 691da853bef5..4bdf5e3ac208 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2306,14 +2306,14 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, struct sw_flow_actions *acts; int new_acts_size; - int req_size = NLA_ALIGN(attr_len); + size_t req_size = NLA_ALIGN(attr_len); int next_offset = offsetof(struct sw_flow_actions, actions) + (*sfa)->actions_len; if (req_size <= (ksize(*sfa) - next_offset)) goto out; - new_acts_size = ksize(*sfa) * 2; + new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); if (new_acts_size > MAX_ACTIONS_BUFSIZE) { if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8376bc1c1508..9b81813dd16a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1852,7 +1852,8 @@ oom: static void packet_parse_headers(struct sk_buff *skb, struct socket *sock) { - if (!skb->protocol && sock->type == SOCK_RAW) { + if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) && + sock->type == SOCK_RAW) { skb_reset_mac_header(skb); skb->protocol = dev_parse_header_protocol(skb); } @@ -2601,8 +2602,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) void *ph; DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); + unsigned char *addr = NULL; int tp_len, size_max; - unsigned char *addr; void *data; int len_sum = 0; int status = TP_STATUS_AVAILABLE; @@ -2613,7 +2614,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; - addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -2623,10 +2623,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out_put; + if (po->sk.sk_socket->type == SOCK_DGRAM) { + if (dev && msg->msg_namelen < dev->addr_len + + offsetof(struct sockaddr_ll, sll_addr)) + goto out_put; + addr = saddr->sll_addr; + } } err = -ENXIO; @@ -2798,7 +2801,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) struct sk_buff *skb; struct net_device *dev; __be16 proto; - unsigned char *addr; + unsigned char *addr = NULL; int err, reserve = 0; struct sockcm_cookie sockc; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2815,7 +2818,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; - addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -2823,10 +2825,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out_unlock; + if (sock->type == SOCK_DGRAM) { + if (dev && msg->msg_namelen < dev->addr_len + + offsetof(struct sockaddr_ll, sll_addr)) + goto out_unlock; + addr = saddr->sll_addr; + } } err = -ENXIO; @@ -3243,7 +3248,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, } mutex_lock(&net->packet.sklist_lock); - sk_add_node_rcu(sk, &net->packet.sklist); + sk_add_node_tail_rcu(sk, &net->packet.sklist); mutex_unlock(&net->packet.sklist_lock); preempt_disable(); @@ -3343,20 +3348,29 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + int copy_len; + /* If the address length field is there to be filled * in, we fill it in now. */ if (sock->type == SOCK_PACKET) { __sockaddr_check_size(sizeof(struct sockaddr_pkt)); msg->msg_namelen = sizeof(struct sockaddr_pkt); + copy_len = msg->msg_namelen; } else { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); + copy_len = msg->msg_namelen; + if (msg->msg_namelen < sizeof(struct sockaddr_ll)) { + memset(msg->msg_name + + offsetof(struct sockaddr_ll, sll_addr), + 0, sizeof(sll->sll_addr)); + msg->msg_namelen = sizeof(struct sockaddr_ll); + } } - memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, - msg->msg_namelen); + memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } if (pkt_sk(sk)->auxdata) { @@ -4209,7 +4223,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) struct pgv *pg_vec; int i; - pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL); + pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN); if (unlikely(!pg_vec)) goto out; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index d6cc97fbbbb0..2b969f99ef13 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -543,6 +543,9 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, struct rds_sock *rs = rds_sk_to_rs(sk); int ret = 0; + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return -EINVAL; + lock_sock(sk); switch (uaddr->sa_family) { diff --git a/net/rds/bind.c b/net/rds/bind.c index 17c9d9f0c848..0f4398e7f2a7 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -173,6 +173,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* We allow an RDS socket to be bound to either IPv4 or IPv6 * address. */ + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return -EINVAL; if (uaddr->sa_family == AF_INET) { struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c index 31cf37da4510..93c0437e6a5f 100644 --- a/net/rds/ib_fmr.c +++ b/net/rds/ib_fmr.c @@ -44,6 +44,17 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages) else pool = rds_ibdev->mr_1m_pool; + if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) + queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); + + /* Switch pools if one of the pool is reaching upper limit */ + if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) { + if (pool->pool_type == RDS_IB_MR_8K_POOL) + pool = rds_ibdev->mr_1m_pool; + else + pool = rds_ibdev->mr_8k_pool; + } + ibmr = rds_ib_try_reuse_ibmr(pool); if (ibmr) return ibmr; diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 63c8d107adcf..d664e9ade74d 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -454,9 +454,6 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool) struct rds_ib_mr *ibmr = NULL; int iter = 0; - if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10) - queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); - while (1) { ibmr = rds_ib_reuse_mr(pool); if (ibmr) diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 70559854837e..8946c89d7392 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -772,7 +772,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, unsigned long frag_off; unsigned long to_copy; unsigned long copied; - uint64_t uncongested = 0; + __le64 uncongested = 0; void *addr; /* catch completely corrupt packets */ @@ -789,7 +789,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, copied = 0; while (copied < RDS_CONG_MAP_BYTES) { - uint64_t *src, *dst; + __le64 *src, *dst; unsigned int k; to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); @@ -824,9 +824,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, } /* the congestion map is in little endian order */ - uncongested = le64_to_cpu(uncongested); - - rds_cong_map_updated(map, uncongested); + rds_cong_map_updated(map, le64_to_cpu(uncongested)); } static void rds_ib_process_recv(struct rds_connection *conn, diff --git a/net/rds/tcp.c b/net/rds/tcp.c index fd2694174607..faf726e00e27 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -608,7 +608,7 @@ static void rds_tcp_kill_sock(struct net *net) list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); - if (net != c_net || !tc->t_sock) + if (net != c_net) continue; if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) { list_move_tail(&tc->t_tcp_node, &tmp_list); diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 7af4f99c4a93..094a6621f8e8 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -16,6 +16,7 @@ #include <linux/init.h> static struct sk_buff_head loopback_queue; +#define ROSE_LOOPBACK_LIMIT 1000 static struct timer_list loopback_timer; static void rose_set_loopback_timer(void); @@ -35,29 +36,27 @@ static int rose_loopback_running(void) int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh) { - struct sk_buff *skbn; + struct sk_buff *skbn = NULL; - skbn = skb_clone(skb, GFP_ATOMIC); + if (skb_queue_len(&loopback_queue) < ROSE_LOOPBACK_LIMIT) + skbn = skb_clone(skb, GFP_ATOMIC); - kfree_skb(skb); - - if (skbn != NULL) { + if (skbn) { + consume_skb(skb); skb_queue_tail(&loopback_queue, skbn); if (!rose_loopback_running()) rose_set_loopback_timer(); + } else { + kfree_skb(skb); } return 1; } - static void rose_set_loopback_timer(void) { - del_timer(&loopback_timer); - - loopback_timer.expires = jiffies + 10; - add_timer(&loopback_timer); + mod_timer(&loopback_timer, jiffies + 10); } static void rose_loopback_timer(struct timer_list *unused) @@ -68,8 +67,12 @@ static void rose_loopback_timer(struct timer_list *unused) struct sock *sk; unsigned short frametype; unsigned int lci_i, lci_o; + int count; - while ((skb = skb_dequeue(&loopback_queue)) != NULL) { + for (count = 0; count < ROSE_LOOPBACK_LIMIT; count++) { + skb = skb_dequeue(&loopback_queue); + if (!skb) + return; if (skb->len < ROSE_MIN_LEN) { kfree_skb(skb); continue; @@ -106,6 +109,8 @@ static void rose_loopback_timer(struct timer_list *unused) kfree_skb(skb); } } + if (!skb_queue_empty(&loopback_queue)) + mod_timer(&loopback_timer, jiffies + 1); } void __exit rose_loopback_clear(void) diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 7ca57741b2fb..7849f286bb93 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -105,16 +105,17 @@ void rose_write_internal(struct sock *sk, int frametype) struct sk_buff *skb; unsigned char *dptr; unsigned char lci1, lci2; - char buffer[100]; - int len, faclen = 0; + int maxfaclen = 0; + int len, faclen; + int reserve; - len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 1; + reserve = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1; + len = ROSE_MIN_LEN; switch (frametype) { case ROSE_CALL_REQUEST: len += 1 + ROSE_ADDR_LEN + ROSE_ADDR_LEN; - faclen = rose_create_facilities(buffer, rose); - len += faclen; + maxfaclen = 256; break; case ROSE_CALL_ACCEPTED: case ROSE_CLEAR_REQUEST: @@ -123,15 +124,16 @@ void rose_write_internal(struct sock *sk, int frametype) break; } - if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) + skb = alloc_skb(reserve + len + maxfaclen, GFP_ATOMIC); + if (!skb) return; /* * Space for AX.25 header and PID. */ - skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1); + skb_reserve(skb, reserve); - dptr = skb_put(skb, skb_tailroom(skb)); + dptr = skb_put(skb, len); lci1 = (rose->lci >> 8) & 0x0F; lci2 = (rose->lci >> 0) & 0xFF; @@ -146,7 +148,8 @@ void rose_write_internal(struct sock *sk, int frametype) dptr += ROSE_ADDR_LEN; memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN); dptr += ROSE_ADDR_LEN; - memcpy(dptr, buffer, faclen); + faclen = rose_create_facilities(dptr, rose); + skb_put(skb, faclen); dptr += faclen; break; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 96f2952bbdfd..ae8c5d7f3bf1 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -135,7 +135,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr; struct rxrpc_local *local; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - u16 service_id = srx->srx_service; + u16 service_id; int ret; _enter("%p,%p,%d", rx, saddr, len); @@ -143,6 +143,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) ret = rxrpc_validate_address(rx, srx, len); if (ret < 0) goto error; + service_id = srx->srx_service; lock_sock(&rx->sk); @@ -370,18 +371,22 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call); * rxrpc_kernel_check_life - Check to see whether a call is still alive * @sock: The socket the call is on * @call: The call to check + * @_life: Where to store the life value * * Allow a kernel service to find out whether a call is still alive - ie. we're - * getting ACKs from the server. Returns a number representing the life state - * which can be compared to that returned by a previous call. + * getting ACKs from the server. Passes back in *_life a number representing + * the life state which can be compared to that returned by a previous call and + * return true if the call is still alive. * * If the life state stalls, rxrpc_kernel_probe_life() should be called and * then 2RTT waited. */ -u32 rxrpc_kernel_check_life(const struct socket *sock, - const struct rxrpc_call *call) +bool rxrpc_kernel_check_life(const struct socket *sock, + const struct rxrpc_call *call, + u32 *_life) { - return call->acks_latest; + *_life = call->acks_latest; + return call->state != RXRPC_CALL_COMPLETE; } EXPORT_SYMBOL(rxrpc_kernel_check_life); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 4b1a534d290a..062ca9dc29b8 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -654,6 +654,7 @@ struct rxrpc_call { u8 ackr_reason; /* reason to ACK */ u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ + rxrpc_serial_t ackr_first_seq; /* first sequence number received */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */ rxrpc_seq_t ackr_seen; /* Highest packet shown seen */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 8aa2937b069f..fe96881a334d 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -604,30 +604,30 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) _enter(""); - if (list_empty(&rxnet->calls)) - return; + if (!list_empty(&rxnet->calls)) { + write_lock(&rxnet->call_lock); - write_lock(&rxnet->call_lock); + while (!list_empty(&rxnet->calls)) { + call = list_entry(rxnet->calls.next, + struct rxrpc_call, link); + _debug("Zapping call %p", call); - while (!list_empty(&rxnet->calls)) { - call = list_entry(rxnet->calls.next, struct rxrpc_call, link); - _debug("Zapping call %p", call); + rxrpc_see_call(call); + list_del_init(&call->link); - rxrpc_see_call(call); - list_del_init(&call->link); + pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", + call, atomic_read(&call->usage), + rxrpc_call_states[call->state], + call->flags, call->events); - pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", - call, atomic_read(&call->usage), - rxrpc_call_states[call->state], - call->flags, call->events); + write_unlock(&rxnet->call_lock); + cond_resched(); + write_lock(&rxnet->call_lock); + } write_unlock(&rxnet->call_lock); - cond_resched(); - write_lock(&rxnet->call_lock); } - write_unlock(&rxnet->call_lock); - atomic_dec(&rxnet->nr_calls); wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls)); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index b6fca8ebb117..8d31fb4c51e1 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -153,7 +153,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, * pass a connection-level abort onto all calls on that connection */ static void rxrpc_abort_calls(struct rxrpc_connection *conn, - enum rxrpc_call_completion compl) + enum rxrpc_call_completion compl, + rxrpc_serial_t serial) { struct rxrpc_call *call; int i; @@ -173,6 +174,9 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, call->call_id, 0, conn->abort_code, conn->error); + else + trace_rxrpc_rx_abort(call, serial, + conn->abort_code); if (rxrpc_set_call_completion(call, compl, conn->abort_code, conn->error)) @@ -213,8 +217,6 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, conn->state = RXRPC_CONN_LOCALLY_ABORTED; spin_unlock_bh(&conn->state_lock); - rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED); - msg.msg_name = &conn->params.peer->srx.transport; msg.msg_namelen = conn->params.peer->srx.transport_len; msg.msg_control = NULL; @@ -242,6 +244,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, len = iov[0].iov_len + iov[1].iov_len; serial = atomic_inc_return(&conn->serial); + rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial); whdr.serial = htonl(serial); _proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code); @@ -321,7 +324,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, conn->error = -ECONNABORTED; conn->abort_code = abort_code; conn->state = RXRPC_CONN_REMOTELY_ABORTED; - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED); + rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 9128aa0e40aa..c2c35cf4e308 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -837,7 +837,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, u8 acks[RXRPC_MAXACKS]; } buf; rxrpc_serial_t acked_serial; - rxrpc_seq_t first_soft_ack, hard_ack; + rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; int nr_acks, offset, ioffset; _enter(""); @@ -851,13 +851,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, acked_serial = ntohl(buf.ack.serial); first_soft_ack = ntohl(buf.ack.firstPacket); + prev_pkt = ntohl(buf.ack.previousPacket); hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? buf.ack.reason : RXRPC_ACK__INVALID); trace_rxrpc_rx_ack(call, sp->hdr.serial, acked_serial, - first_soft_ack, ntohl(buf.ack.previousPacket), + first_soft_ack, prev_pkt, summary.ack_reason, nr_acks); if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) @@ -878,8 +879,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ack_respond_to_ack); } - /* Discard any out-of-order or duplicate ACKs. */ - if (before_eq(sp->hdr.serial, call->acks_latest)) + /* Discard any out-of-order or duplicate ACKs (outside lock). */ + if (before(first_soft_ack, call->ackr_first_seq) || + before(prev_pkt, call->ackr_prev_seq)) return; buf.info.rxMTU = 0; @@ -890,12 +892,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, spin_lock(&call->input_lock); - /* Discard any out-of-order or duplicate ACKs. */ - if (before_eq(sp->hdr.serial, call->acks_latest)) + /* Discard any out-of-order or duplicate ACKs (inside lock). */ + if (before(first_soft_ack, call->ackr_first_seq) || + before(prev_pkt, call->ackr_prev_seq)) goto out; call->acks_latest_ts = skb->tstamp; call->acks_latest = sp->hdr.serial; + call->ackr_first_seq = first_soft_ack; + call->ackr_prev_seq = prev_pkt; + /* Parse rwind and mtu sizes if provided. */ if (buf.info.rxMTU) rxrpc_input_ackinfo(call, skb, &buf.info); @@ -1155,19 +1161,19 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) * handle data received on the local endpoint * - may be called in interrupt context * - * The socket is locked by the caller and this prevents the socket from being - * shut down and the local endpoint from going away, thus sk_user_data will not - * be cleared until this function returns. + * [!] Note that as this is called from the encap_rcv hook, the socket is not + * held locked by the caller and nothing prevents sk_user_data on the UDP from + * being cleared in the middle of processing this function. * * Called with the RCU read lock held from the IP layer via UDP. */ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) { + struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk); struct rxrpc_connection *conn; struct rxrpc_channel *chan; struct rxrpc_call *call = NULL; struct rxrpc_skb_priv *sp; - struct rxrpc_local *local = udp_sk->sk_user_data; struct rxrpc_peer *peer = NULL; struct rxrpc_sock *rx = NULL; unsigned int channel; @@ -1175,6 +1181,10 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) _enter("%p", udp_sk); + if (unlikely(!local)) { + kfree_skb(skb); + return 0; + } if (skb->tstamp == 0) skb->tstamp = ktime_get_real(); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 15cf42d5b53a..01959db51445 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -304,7 +304,8 @@ nomem: ret = -ENOMEM; sock_error: mutex_unlock(&rxnet->local_mutex); - kfree(local); + if (local) + call_rcu(&local->rcu, rxrpc_local_rcu); _leave(" = %d", ret); return ERR_PTR(ret); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 736aa9281100..004c762c2e8d 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -335,7 +335,6 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, struct kvec iov[2]; rxrpc_serial_t serial; size_t len; - bool lost = false; int ret, opt; _enter(",{%d}", skb->len); @@ -393,14 +392,14 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, static int lose; if ((lose++ & 7) == 7) { ret = 0; - lost = true; + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, + whdr.flags, retrans, true); + goto done; } } - trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, - retrans, lost); - if (lost) - goto done; + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, retrans, + false); /* send the packet with the don't fragment bit set if we currently * think it's small enough */ diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index bc05af89fc38..6e84d878053c 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -157,6 +157,11 @@ void rxrpc_error_report(struct sock *sk) _enter("%p{%d}", sk, local->debug_id); + /* Clear the outstanding error value on the socket so that it doesn't + * cause kernel_sendmsg() to return it later. + */ + sock_error(sk); + skb = sock_dequeue_err_skb(sk); if (!skb) { _leave("UDP socket errqueue empty"); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 46c9312085b1..bec64deb7b0a 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -152,12 +152,13 @@ static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call, } /* - * Queue a DATA packet for transmission, set the resend timeout and send the - * packet immediately + * Queue a DATA packet for transmission, set the resend timeout and send + * the packet immediately. Returns the error from rxrpc_send_data_packet() + * in case the caller wants to do something with it. */ -static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, - struct sk_buff *skb, bool last, - rxrpc_notify_end_tx_t notify_end_tx) +static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, + struct sk_buff *skb, bool last, + rxrpc_notify_end_tx_t notify_end_tx) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned long now; @@ -250,7 +251,8 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, out: rxrpc_free_skb(skb, rxrpc_skb_tx_freed); - _leave(""); + _leave(" = %d", ret); + return ret; } /* @@ -423,9 +425,10 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, if (ret < 0) goto out; - rxrpc_queue_packet(rx, call, skb, - !msg_data_left(msg) && !more, - notify_end_tx); + ret = rxrpc_queue_packet(rx, call, skb, + !msg_data_left(msg) && !more, + notify_end_tx); + /* Should check for failure here */ skb = NULL; } } while (msg_data_left(msg) > 0); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 1b9afdee5ba9..5c02ad97ef23 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -358,8 +358,7 @@ config NET_SCH_PIE help Say Y here if you want to use the Proportional Integral controller Enhanced scheduler packet scheduling algorithm. - For more information, please see - http://tools.ietf.org/html/draft-pan-tsvwg-pie-00 + For more information, please see https://tools.ietf.org/html/rfc8033 To compile this driver as a module, choose M here: the module will be called sch_pie. diff --git a/net/sched/act_api.c b/net/sched/act_api.c index aecf1bf233c8..5a87e271d35a 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -28,27 +28,10 @@ #include <net/act_api.h> #include <net/netlink.h> -static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp) -{ - u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK; - - if (!tp) - return -EINVAL; - a->goto_chain = tcf_chain_get_by_act(tp->chain->block, chain_index); - if (!a->goto_chain) - return -ENOMEM; - return 0; -} - -static void tcf_action_goto_chain_fini(struct tc_action *a) -{ - tcf_chain_put_by_act(a->goto_chain); -} - static void tcf_action_goto_chain_exec(const struct tc_action *a, struct tcf_result *res) { - const struct tcf_chain *chain = a->goto_chain; + const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain); res->goto_tp = rcu_dereference_bh(chain->filter_chain); } @@ -71,6 +54,51 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie, call_rcu(&old->rcu, tcf_free_cookie_rcu); } +int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, + struct tcf_chain **newchain, + struct netlink_ext_ack *extack) +{ + int opcode = TC_ACT_EXT_OPCODE(action), ret = -EINVAL; + u32 chain_index; + + if (!opcode) + ret = action > TC_ACT_VALUE_MAX ? -EINVAL : 0; + else if (opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC) + ret = 0; + if (ret) { + NL_SET_ERR_MSG(extack, "invalid control action"); + goto end; + } + + if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) { + chain_index = action & TC_ACT_EXT_VAL_MASK; + if (!tp || !newchain) { + ret = -EINVAL; + NL_SET_ERR_MSG(extack, + "can't goto NULL proto/chain"); + goto end; + } + *newchain = tcf_chain_get_by_act(tp->chain->block, chain_index); + if (!*newchain) { + ret = -ENOMEM; + NL_SET_ERR_MSG(extack, + "can't allocate goto_chain"); + } + } +end: + return ret; +} +EXPORT_SYMBOL(tcf_action_check_ctrlact); + +struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, + struct tcf_chain *goto_chain) +{ + a->tcfa_action = action; + rcu_swap_protected(a->goto_chain, goto_chain, 1); + return goto_chain; +} +EXPORT_SYMBOL(tcf_action_set_ctrlact); + /* XXX: For standalone actions, we don't need a RCU grace period either, because * actions are always connected to filters and filters are already destroyed in * RCU callbacks, so after a RCU grace period actions are already disconnected @@ -78,13 +106,15 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie, */ static void free_tcf(struct tc_action *p) { + struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1); + free_percpu(p->cpu_bstats); free_percpu(p->cpu_bstats_hw); free_percpu(p->cpu_qstats); tcf_set_action_cookie(&p->act_cookie, NULL); - if (p->goto_chain) - tcf_action_goto_chain_fini(p); + if (chain) + tcf_chain_put_by_act(chain); kfree(p); } @@ -654,6 +684,10 @@ repeat: return TC_ACT_OK; } } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { + if (unlikely(!rcu_access_pointer(a->goto_chain))) { + net_warn_ratelimited("can't go to NULL chain!\n"); + return TC_ACT_SHOT; + } tcf_action_goto_chain_exec(a, res); } @@ -800,15 +834,6 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) return c; } -static bool tcf_action_valid(int action) -{ - int opcode = TC_ACT_EXT_OPCODE(action); - - if (!opcode) - return action <= TC_ACT_VALUE_MAX; - return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC; -} - struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, @@ -890,10 +915,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, /* backward compatibility for policer */ if (name == NULL) err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind, - rtnl_held, extack); + rtnl_held, tp, extack); else err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held, - extack); + tp, extack); if (err < 0) goto err_mod; @@ -907,18 +932,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, if (err != ACT_P_CREATED) module_put(a_o->owner); - if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) { - err = tcf_action_goto_chain_init(a, tp); - if (err) { - tcf_action_destroy_1(a, bind); - NL_SET_ERR_MSG(extack, "Failed to init TC action chain"); - return ERR_PTR(err); - } - } - - if (!tcf_action_valid(a->tcfa_action)) { + if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) && + !rcu_access_pointer(a->goto_chain)) { tcf_action_destroy_1(a, bind); - NL_SET_ERR_MSG(extack, "Invalid control action value"); + NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain"); return ERR_PTR(-EINVAL); } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index aa5c38d11a30..3841156aa09f 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -17,6 +17,7 @@ #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_bpf.h> #include <net/tc_act/tc_bpf.h> @@ -278,10 +279,11 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int replace, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, bpf_net_id); struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_bpf_cfg cfg, old; struct tc_act_bpf *parm; struct tcf_bpf *prog; @@ -323,12 +325,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, return ret; } + ret = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (ret < 0) + goto release_idr; + is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS]; is_ebpf = tb[TCA_ACT_BPF_FD]; if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) { ret = -EINVAL; - goto out; + goto put_chain; } memset(&cfg, 0, sizeof(cfg)); @@ -336,7 +342,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) : tcf_bpf_init_from_efd(tb, &cfg); if (ret < 0) - goto out; + goto put_chain; prog = to_bpf(*act); @@ -350,10 +356,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (cfg.bpf_num_ops) prog->bpf_num_ops = cfg.bpf_num_ops; - prog->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*act, parm->action, goto_ch); rcu_assign_pointer(prog->filter, cfg.filter); spin_unlock_bh(&prog->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + if (res == ACT_P_CREATED) { tcf_idr_insert(tn, *act); } else { @@ -363,9 +372,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, } return res; -out: - tcf_idr_release(*act, bind); +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + +release_idr: + tcf_idr_release(*act, bind); return ret; } diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 5d24993cccfe..32ae0cd6e31c 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -21,6 +21,7 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/act_api.h> +#include <net/pkt_cls.h> #include <uapi/linux/tc_act/tc_connmark.h> #include <net/tc_act/tc_connmark.h> @@ -97,13 +98,15 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, connmark_net_id); struct nlattr *tb[TCA_CONNMARK_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_connmark_info *ci; struct tc_connmark *parm; - int ret = 0; + int ret = 0, err; if (!nla) return -EINVAL; @@ -128,7 +131,11 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, } ci = to_connmark(*a); - ci->tcf_action = parm->action; + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, + extack); + if (err < 0) + goto release_idr; + tcf_action_set_ctrlact(*a, parm->action, goto_ch); ci->net = net; ci->zone = parm->zone; @@ -142,15 +149,24 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, + extack); + if (err < 0) + goto release_idr; /* replacing action and zone */ spin_lock_bh(&ci->tcf_lock); - ci->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); ci->zone = parm->zone; spin_unlock_bh(&ci->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); ret = 0; } return ret; +release_idr: + tcf_idr_release(*a, bind); + return err; } static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index c79aca29505e..0c77e7bdf6d5 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -33,6 +33,7 @@ #include <net/sctp/checksum.h> #include <net/act_api.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_csum.h> #include <net/tc_act/tc_csum.h> @@ -46,12 +47,13 @@ static struct tc_action_ops act_csum_ops; static int tcf_csum_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, + int bind, bool rtnl_held, struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, csum_net_id); struct tcf_csum_params *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_csum *parm; struct tcf_csum *p; int ret = 0, err; @@ -87,21 +89,27 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, return err; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + p = to_tcf_csum(*a); params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } params_new->update_flags = parm->update_flags; spin_lock_bh(&p->tcf_lock); - p->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(p->params, params_new, lockdep_is_held(&p->tcf_lock)); spin_unlock_bh(&p->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (params_new) kfree_rcu(params_new, rcu); @@ -109,6 +117,12 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } /** diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 93da0004e9f4..e540e31069d7 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -20,6 +20,7 @@ #include <linux/init.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_gact.h> #include <net/tc_act/tc_gact.h> @@ -57,10 +58,11 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { static int tcf_gact_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gact_net_id); struct nlattr *tb[TCA_GACT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_gact *parm; struct tcf_gact *gact; int ret = 0; @@ -116,10 +118,13 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, return err; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; gact = to_gact(*a); spin_lock_bh(&gact->tcf_lock); - gact->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); #ifdef CONFIG_GACT_PROB if (p_parm) { gact->tcfg_paction = p_parm->paction; @@ -133,9 +138,15 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, #endif spin_unlock_bh(&gact->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 9b1f2b3990ee..31c6ffb6abe7 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -29,6 +29,7 @@ #include <net/net_namespace.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <uapi/linux/tc_act/tc_ife.h> #include <net/tc_act/tc_ife.h> #include <linux/etherdevice.h> @@ -469,11 +470,12 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, static int tcf_ife_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ife_net_id); struct nlattr *tb[TCA_IFE_MAX + 1]; struct nlattr *tb2[IFE_META_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_ife_params *p; struct tcf_ife_info *ife; u16 ife_type = ETH_P_IFE; @@ -531,6 +533,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } ife = to_ife(*a); + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + p->flags = parm->flags; if (parm->flags & IFE_ENCODE) { @@ -563,13 +569,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (tb[TCA_IFE_METALST]) { err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST], NULL, NULL); - if (err) { -metadata_parse_err: - tcf_idr_release(*a, bind); - kfree(p); - return err; - } - + if (err) + goto metadata_parse_err; err = populate_metalist(ife, tb2, exists, rtnl_held); if (err) goto metadata_parse_err; @@ -581,21 +582,20 @@ metadata_parse_err: * going to bail out */ err = use_all_metadata(ife, exists); - if (err) { - tcf_idr_release(*a, bind); - kfree(p); - return err; - } + if (err) + goto metadata_parse_err; } if (exists) spin_lock_bh(&ife->tcf_lock); - ife->tcf_action = parm->action; /* protected by tcf_lock when modifying existing action */ + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(ife->params, p, 1); if (exists) spin_unlock_bh(&ife->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (p) kfree_rcu(p, rcu); @@ -603,6 +603,13 @@ metadata_parse_err: tcf_idr_insert(tn, *a); return ret; +metadata_parse_err: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + kfree(p); + tcf_idr_release(*a, bind); + return err; } static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 98f5b6ea77b4..04a0b5c61194 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -97,7 +97,8 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - const struct tc_action_ops *ops, int ovr, int bind) + const struct tc_action_ops *ops, int ovr, int bind, + struct tcf_proto *tp) { struct tc_action_net *tn = net_generic(net, id); struct nlattr *tb[TCA_IPT_MAX + 1]; @@ -205,20 +206,20 @@ err1: static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, + int bind, bool rtnl_held, struct tcf_proto *tp, struct netlink_ext_ack *extack) { return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr, - bind); + bind, tp); } static int tcf_xt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool unlocked, + int bind, bool unlocked, struct tcf_proto *tp, struct netlink_ext_ack *extack) { return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr, - bind); + bind, tp); } static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6692fd054617..17cc6bd4c57c 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -94,10 +94,12 @@ static struct tc_action_ops act_mirred_ops; static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mirred_net_id); struct nlattr *tb[TCA_MIRRED_MAX + 1]; + struct tcf_chain *goto_ch = NULL; bool mac_header_xmit = false; struct tc_mirred *parm; struct tcf_mirred *m; @@ -157,18 +159,23 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + m = to_mirred(*a); + if (ret == ACT_P_CREATED) + INIT_LIST_HEAD(&m->tcfm_list); + + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; spin_lock_bh(&m->tcf_lock); - m->tcf_action = parm->action; - m->tcfm_eaction = parm->eaction; if (parm->ifindex) { dev = dev_get_by_index(net, parm->ifindex); if (!dev) { spin_unlock_bh(&m->tcf_lock); - tcf_idr_release(*a, bind); - return -ENODEV; + err = -ENODEV; + goto put_chain; } mac_header_xmit = dev_is_mac_header_xmit(dev); rcu_swap_protected(m->tcfm_dev, dev, @@ -177,7 +184,11 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, dev_put(dev); m->tcfm_mac_header_xmit = mac_header_xmit; } + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); + m->tcfm_eaction = parm->eaction; spin_unlock_bh(&m->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) { spin_lock(&mirred_list_lock); @@ -188,6 +199,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 543eab9193f1..e91bb8eb81ec 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -21,6 +21,7 @@ #include <linux/string.h> #include <linux/tc_act/tc_nat.h> #include <net/act_api.h> +#include <net/pkt_cls.h> #include <net/icmp.h> #include <net/ip.h> #include <net/netlink.h> @@ -38,10 +39,12 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, - bool rtnl_held, struct netlink_ext_ack *extack) + bool rtnl_held, struct tcf_proto *tp, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, nat_net_id); struct nlattr *tb[TCA_NAT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_nat *parm; int ret = 0, err; struct tcf_nat *p; @@ -76,6 +79,9 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, } else { return err; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; p = to_tcf_nat(*a); spin_lock_bh(&p->tcf_lock); @@ -84,13 +90,18 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, p->mask = parm->mask; p->flags = parm->flags; - p->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); spin_unlock_bh(&p->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index a80373878df7..287793abfaf9 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -23,6 +23,7 @@ #include <linux/tc_act/tc_pedit.h> #include <net/tc_act/tc_pedit.h> #include <uapi/linux/tc_act/tc_pedit.h> +#include <net/pkt_cls.h> static unsigned int pedit_net_id; static struct tc_action_ops act_pedit_ops; @@ -138,10 +139,11 @@ nla_failure: static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, pedit_net_id); struct nlattr *tb[TCA_PEDIT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_pedit_key *keys = NULL; struct tcf_pedit_key_ex *keys_ex; struct tc_pedit *parm; @@ -205,6 +207,11 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, goto out_free; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) { + ret = err; + goto out_release; + } p = to_pedit(*a); spin_lock_bh(&p->tcf_lock); @@ -214,7 +221,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (!keys) { spin_unlock_bh(&p->tcf_lock); ret = -ENOMEM; - goto out_release; + goto put_chain; } kfree(p->tcfp_keys); p->tcfp_keys = keys; @@ -223,16 +230,21 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, memcpy(p->tcfp_keys, parm->keys, ksize); p->tcfp_flags = parm->flags; - p->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); kfree(p->tcfp_keys_ex); p->tcfp_keys_ex = keys_ex; spin_unlock_bh(&p->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); out_release: tcf_idr_release(*a, bind); out_free: diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8271a6263824..2b8581f6ab51 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -21,6 +21,7 @@ #include <linux/slab.h> #include <net/act_api.h> #include <net/netlink.h> +#include <net/pkt_cls.h> struct tcf_police_params { int tcfp_result; @@ -83,10 +84,12 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { static int tcf_police_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { int ret = 0, tcfp_result = TC_ACT_OK, err, size; struct nlattr *tb[TCA_POLICE_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_police *parm; struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; @@ -128,6 +131,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; police = to_police(*a); if (parm->rate.rate) { @@ -213,12 +219,14 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, if (new->peak_present) police->tcfp_ptoks = new->tcfp_mtu_ptoks; spin_unlock_bh(&police->tcfp_lock); - police->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(police->params, new, lockdep_is_held(&police->tcf_lock)); spin_unlock_bh(&police->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (new) kfree_rcu(new, rcu); @@ -229,6 +237,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: tcf_idr_release(*a, bind); return err; } diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 203e399e5c85..0f82d50ea232 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -22,6 +22,7 @@ #include <linux/tc_act/tc_sample.h> #include <net/tc_act/tc_sample.h> #include <net/psample.h> +#include <net/pkt_cls.h> #include <linux/if_arp.h> @@ -37,14 +38,15 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = { static int tcf_sample_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, + int bind, bool rtnl_held, struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, sample_net_id); struct nlattr *tb[TCA_SAMPLE_MAX + 1]; struct psample_group *psample_group; + struct tcf_chain *goto_ch = NULL; + u32 psample_group_num, rate; struct tc_sample *parm; - u32 psample_group_num; struct tcf_sample *s; bool exists = false; int ret, err; @@ -79,19 +81,28 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); + if (!rate) { + NL_SET_ERR_MSG(extack, "invalid sample rate"); + err = -EINVAL; + goto put_chain; + } psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]); psample_group = psample_group_get(net, psample_group_num); if (!psample_group) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } s = to_sample(*a); spin_lock_bh(&s->tcf_lock); - s->tcf_action = parm->action; - s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); + s->rate = rate; s->psample_group_num = psample_group_num; RCU_INIT_POINTER(s->psample_group, psample_group); @@ -100,10 +111,18 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]); } spin_unlock_bh(&s->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static void tcf_sample_cleanup(struct tc_action *a) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index d54cb608dbaf..23c8ca5615e5 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -18,6 +18,7 @@ #include <linux/rtnetlink.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_defact.h> #include <net/tc_act/tc_defact.h> @@ -60,14 +61,26 @@ static int alloc_defdata(struct tcf_defact *d, const struct nlattr *defdata) return 0; } -static void reset_policy(struct tcf_defact *d, const struct nlattr *defdata, - struct tc_defact *p) +static int reset_policy(struct tc_action *a, const struct nlattr *defdata, + struct tc_defact *p, struct tcf_proto *tp, + struct netlink_ext_ack *extack) { + struct tcf_chain *goto_ch = NULL; + struct tcf_defact *d; + int err; + + err = tcf_action_check_ctrlact(p->action, tp, &goto_ch, extack); + if (err < 0) + return err; + d = to_defact(a); spin_lock_bh(&d->tcf_lock); - d->tcf_action = p->action; + goto_ch = tcf_action_set_ctrlact(a, p->action, goto_ch); memset(d->tcfd_defdata, 0, SIMP_MAX_DATA); nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); spin_unlock_bh(&d->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + return 0; } static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { @@ -78,10 +91,11 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { static int tcf_simp_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, simp_net_id); struct nlattr *tb[TCA_DEF_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_defact *parm; struct tcf_defact *d; bool exists = false; @@ -122,27 +136,37 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } d = to_defact(*a); - ret = alloc_defdata(d, tb[TCA_DEF_DATA]); - if (ret < 0) { - tcf_idr_release(*a, bind); - return ret; - } - d->tcf_action = parm->action; + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, + extack); + if (err < 0) + goto release_idr; + + err = alloc_defdata(d, tb[TCA_DEF_DATA]); + if (err < 0) + goto put_chain; + + tcf_action_set_ctrlact(*a, parm->action, goto_ch); ret = ACT_P_CREATED; } else { - d = to_defact(*a); - if (!ovr) { - tcf_idr_release(*a, bind); - return -EEXIST; + err = -EEXIST; + goto release_idr; } - reset_policy(d, tb[TCA_DEF_DATA], parm); + err = reset_policy(*a, tb[TCA_DEF_DATA], parm, tp, extack); + if (err) + goto release_idr; } if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 65879500b688..7e1d261a31d2 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -26,6 +26,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/dsfield.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_skbedit.h> #include <net/tc_act/tc_skbedit.h> @@ -96,11 +97,13 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); struct tcf_skbedit_params *params_new; struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_skbedit *parm; struct tcf_skbedit *d; u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL; @@ -186,11 +189,14 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, return -EEXIST; } } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } params_new->flags = flags; @@ -208,16 +214,24 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, params_new->mask = *mask; spin_lock_bh(&d->tcf_lock); - d->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(d->params, params_new, lockdep_is_held(&d->tcf_lock)); spin_unlock_bh(&d->tcf_lock); if (params_new) kfree_rcu(params_new, rcu); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 7bac1d78e7a3..1d4c324d0a42 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -16,6 +16,7 @@ #include <linux/rtnetlink.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_skbmod.h> #include <net/tc_act/tc_skbmod.h> @@ -82,11 +83,13 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); struct nlattr *tb[TCA_SKBMOD_MAX + 1]; struct tcf_skbmod_params *p, *p_old; + struct tcf_chain *goto_ch = NULL; struct tc_skbmod *parm; struct tcf_skbmod *d; bool exists = false; @@ -153,21 +156,24 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; d = to_skbmod(*a); p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); if (unlikely(!p)) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } p->flags = lflags; - d->tcf_action = parm->action; if (ovr) spin_lock_bh(&d->tcf_lock); /* Protected by tcf_lock if overwriting existing action. */ + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); p_old = rcu_dereference_protected(d->skbmod_p, 1); if (lflags & SKBMOD_F_DMAC) @@ -183,10 +189,18 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (p_old) kfree_rcu(p_old, rcu); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static void tcf_skbmod_cleanup(struct tc_action *a) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 7c6591b991d5..d5aaf90a3971 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -17,6 +17,7 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/dst.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_tunnel_key.h> @@ -210,12 +211,14 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; struct tcf_tunnel_key_params *params_new; struct metadata_dst *metadata = NULL; + struct tcf_chain *goto_ch = NULL; struct tc_tunnel_key *parm; struct tcf_tunnel_key *t; bool exists = false; @@ -359,6 +362,12 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, goto release_tun_meta; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) { + ret = err; + exists = true; + goto release_tun_meta; + } t = to_tunnel_key(*a); params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); @@ -366,23 +375,29 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters"); ret = -ENOMEM; exists = true; - goto release_tun_meta; + goto put_chain; } params_new->tcft_action = parm->t_action; params_new->tcft_enc_metadata = metadata; spin_lock_bh(&t->tcf_lock); - t->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(t->params, params_new, lockdep_is_held(&t->tcf_lock)); spin_unlock_bh(&t->tcf_lock); tunnel_key_release_params(params_new); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + release_tun_meta: if (metadata) dst_release(&metadata->dst); diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index ac0061599225..0f40d0a74423 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -15,6 +15,7 @@ #include <linux/if_vlan.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_vlan.h> #include <net/tc_act/tc_vlan.h> @@ -105,10 +106,11 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = { static int tcf_vlan_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, vlan_net_id); struct nlattr *tb[TCA_VLAN_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_vlan_params *p; struct tc_vlan *parm; struct tcf_vlan *v; @@ -200,12 +202,16 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + v = to_vlan(*a); p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } p->tcfv_action = action; @@ -214,16 +220,24 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, p->tcfv_push_proto = push_proto; spin_lock_bh(&v->tcf_lock); - v->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(v->vlan_p, p, lockdep_is_held(&v->tcf_lock)); spin_unlock_bh(&v->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (p) kfree_rcu(p, rcu); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static void tcf_vlan_cleanup(struct tc_action *a) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index dc10525e90e7..99ae30c177c7 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -367,7 +367,7 @@ static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block) struct tcf_block *block = chain->block; mutex_destroy(&chain->filter_chain_lock); - kfree(chain); + kfree_rcu(chain, rcu); if (free_block) tcf_block_destroy(block); } diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 459921bd3d87..a13bc351a414 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -130,6 +130,11 @@ static void mall_destroy(struct tcf_proto *tp, bool rtnl_held, static void *mall_get(struct tcf_proto *tp, u32 handle) { + struct cls_mall_head *head = rtnl_dereference(tp->root); + + if (head && head->handle == handle) + return head; + return NULL; } diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 1d2a12132abc..259d97bc2abd 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -211,6 +211,9 @@ struct cake_sched_data { u8 ack_filter; u8 atm_mode; + u32 fwmark_mask; + u16 fwmark_shft; + /* time_next = time_this + ((len * rate_ns) >> rate_shft) */ u16 rate_shft; ktime_t time_next_packet; @@ -258,8 +261,7 @@ enum { CAKE_FLAG_AUTORATE_INGRESS = BIT(1), CAKE_FLAG_INGRESS = BIT(2), CAKE_FLAG_WASH = BIT(3), - CAKE_FLAG_SPLIT_GSO = BIT(4), - CAKE_FLAG_FWMARK = BIT(5) + CAKE_FLAG_SPLIT_GSO = BIT(4) }; /* COBALT operates the Codel and BLUE algorithms in parallel, in order to @@ -1515,16 +1517,27 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) { + int wlen = skb_network_offset(skb); u8 dscp; - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): + wlen += sizeof(struct iphdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; if (wash && dscp) ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); return dscp; case htons(ETH_P_IPV6): + wlen += sizeof(struct ipv6hdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; if (wash && dscp) ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); @@ -1543,7 +1556,7 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, struct sk_buff *skb) { struct cake_sched_data *q = qdisc_priv(sch); - u32 tin; + u32 tin, mark; u8 dscp; /* Tin selection: Default to diffserv-based selection, allow overriding @@ -1551,14 +1564,13 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, */ dscp = cake_handle_diffserv(skb, q->rate_flags & CAKE_FLAG_WASH); + mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft; if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT) tin = 0; - else if (q->rate_flags & CAKE_FLAG_FWMARK && /* use fw mark */ - skb->mark && - skb->mark <= q->tin_cnt) - tin = q->tin_order[skb->mark - 1]; + else if (mark && mark <= q->tin_cnt) + tin = q->tin_order[mark - 1]; else if (TC_H_MAJ(skb->priority) == sch->handle && TC_H_MIN(skb->priority) > 0 && @@ -2172,6 +2184,7 @@ static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = { [TCA_CAKE_MPU] = { .type = NLA_U32 }, [TCA_CAKE_INGRESS] = { .type = NLA_U32 }, [TCA_CAKE_ACK_FILTER] = { .type = NLA_U32 }, + [TCA_CAKE_FWMARK] = { .type = NLA_U32 }, }; static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, @@ -2619,10 +2632,8 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, } if (tb[TCA_CAKE_FWMARK]) { - if (!!nla_get_u32(tb[TCA_CAKE_FWMARK])) - q->rate_flags |= CAKE_FLAG_FWMARK; - else - q->rate_flags &= ~CAKE_FLAG_FWMARK; + q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]); + q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0; } if (q->tins) { @@ -2784,8 +2795,7 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO))) goto nla_put_failure; - if (nla_put_u32(skb, TCA_CAKE_FWMARK, - !!(q->rate_flags & CAKE_FLAG_FWMARK))) + if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask)) goto nla_put_failure; return nla_nest_end(skb, opts); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 4dc05409e3fb..114b9048ea7e 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1358,9 +1358,11 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; + __u32 qlen; cl->xstats.avgidle = cl->avgidle; cl->xstats.undertime = 0; + qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog); if (cl->undertime != PSCHED_PASTPERFECT) cl->xstats.undertime = cl->undertime - q->now; @@ -1368,7 +1370,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0) + gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); @@ -1665,17 +1667,13 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; - unsigned int qlen, backlog; if (cl->filters || cl->children || cl == &q->link) return -EBUSY; sch_tree_lock(sch); - qlen = cl->q->q.qlen; - backlog = cl->q->qstats.backlog; - qdisc_reset(cl->q); - qdisc_tree_reduce_backlog(cl->q, qlen, backlog); + qdisc_purge_queue(cl->q); if (cl->next_alive) cbq_deactivate_class(cl); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 09b800991065..430df9a55ec4 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -50,15 +50,6 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) return container_of(clc, struct drr_class, common); } -static void drr_purge_queue(struct drr_class *cl) -{ - unsigned int len = cl->qdisc->q.qlen; - unsigned int backlog = cl->qdisc->qstats.backlog; - - qdisc_reset(cl->qdisc); - qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); -} - static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = { [TCA_DRR_QUANTUM] = { .type = NLA_U32 }, }; @@ -167,7 +158,7 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - drr_purge_queue(cl); + qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->common); sch_tree_unlock(sch); @@ -269,7 +260,8 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct drr_class *cl = (struct drr_class *)arg; - __u32 qlen = cl->qdisc->q.qlen; + __u32 qlen = qdisc_qlen_sum(cl->qdisc); + struct Qdisc *cl_q = cl->qdisc; struct tc_drr_stats xstats; memset(&xstats, 0, sizeof(xstats)); @@ -279,7 +271,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0) + gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 24cc220a3218..d2ab463f22ae 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -845,16 +845,6 @@ qdisc_peek_len(struct Qdisc *sch) } static void -hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl) -{ - unsigned int len = cl->qdisc->q.qlen; - unsigned int backlog = cl->qdisc->qstats.backlog; - - qdisc_reset(cl->qdisc); - qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); -} - -static void hfsc_adjust_levels(struct hfsc_class *cl) { struct hfsc_class *p; @@ -1076,7 +1066,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, qdisc_class_hash_insert(&q->clhash, &cl->cl_common); list_add_tail(&cl->siblings, &parent->children); if (parent->level == 0) - hfsc_purge_queue(sch, parent); + qdisc_purge_queue(parent->qdisc); hfsc_adjust_levels(parent); sch_tree_unlock(sch); @@ -1112,7 +1102,7 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg) list_del(&cl->siblings); hfsc_adjust_levels(cl->cl_parent); - hfsc_purge_queue(sch, cl); + qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->cl_common); sch_tree_unlock(sch); @@ -1328,8 +1318,9 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, { struct hfsc_class *cl = (struct hfsc_class *)arg; struct tc_hfsc_stats xstats; + __u32 qlen; - cl->qstats.backlog = cl->qdisc->qstats.backlog; + qdisc_qstats_qlen_backlog(cl->qdisc, &qlen, &cl->qstats.backlog); xstats.level = cl->level; xstats.period = cl->cl_vtperiod; xstats.work = cl->cl_total; @@ -1337,7 +1328,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0) + gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 30f9da7e1076..2f9883b196e8 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1127,10 +1127,9 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) }; __u32 qlen = 0; - if (!cl->level && cl->leaf.q) { - qlen = cl->leaf.q->q.qlen; - qs.backlog = cl->leaf.q->qstats.backlog; - } + if (!cl->level && cl->leaf.q) + qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog); + cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens), INT_MIN, INT_MAX); cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens), @@ -1270,13 +1269,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - if (!cl->level) { - unsigned int qlen = cl->leaf.q->q.qlen; - unsigned int backlog = cl->leaf.q->qstats.backlog; - - qdisc_reset(cl->leaf.q); - qdisc_tree_reduce_backlog(cl->leaf.q, qlen, backlog); - } + if (!cl->level) + qdisc_purge_queue(cl->leaf.q); /* delete from hash and active; remainder in destroy_class */ qdisc_class_hash_remove(&q->clhash, &cl->common); @@ -1404,12 +1398,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, classid, NULL); sch_tree_lock(sch); if (parent && !parent->level) { - unsigned int qlen = parent->leaf.q->q.qlen; - unsigned int backlog = parent->leaf.q->qstats.backlog; - /* turn parent into inner node */ - qdisc_reset(parent->leaf.q); - qdisc_tree_reduce_backlog(parent->leaf.q, qlen, backlog); + qdisc_purge_queue(parent->leaf.q); qdisc_put(parent->leaf.q); if (parent->prio_activity) htb_deactivate(q, parent); diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 203659bc3906..3a3312467692 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -249,7 +249,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch = dev_queue->qdisc_sleeping; if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) + qdisc_qstats_copy(d, sch) < 0) return -1; return 0; } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index d364e63c396d..ea0dc112b38d 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -561,8 +561,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch = dev_queue->qdisc_sleeping; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &sch->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, - &sch->qstats, sch->q.qlen) < 0) + qdisc_qstats_copy(d, sch) < 0) return -1; } return 0; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 7410ce4d0321..35b03ae08e0f 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -201,9 +201,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, for (i = q->bands; i < q->max_bands; i++) { if (q->queues[i] != &noop_qdisc) { struct Qdisc *child = q->queues[i]; + q->queues[i] = &noop_qdisc; - qdisc_tree_reduce_backlog(child, child->q.qlen, - child->qstats.backlog); + qdisc_tree_flush_backlog(child); qdisc_put(child); } } @@ -225,9 +225,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, qdisc_hash_add(child, true); if (old != &noop_qdisc) { - qdisc_tree_reduce_backlog(old, - old->q.qlen, - old->qstats.backlog); + qdisc_tree_flush_backlog(old); qdisc_put(old); } sch_tree_unlock(sch); @@ -344,7 +342,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl_q->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) + qdisc_qstats_copy(d, cl_q) < 0) return -1; return 0; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 847141cd900f..d519b21535b3 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -216,12 +216,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); - for (i = q->bands; i < oldbands; i++) { - struct Qdisc *child = q->queues[i]; - - qdisc_tree_reduce_backlog(child, child->q.qlen, - child->qstats.backlog); - } + for (i = q->bands; i < oldbands; i++) + qdisc_tree_flush_backlog(q->queues[i]); for (i = oldbands; i < q->bands; i++) { q->queues[i] = queues[i]; @@ -365,7 +361,7 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl_q->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) + qdisc_qstats_copy(d, cl_q) < 0) return -1; return 0; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 29f5c4a24688..1589364b54da 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -217,15 +217,6 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) return container_of(clc, struct qfq_class, common); } -static void qfq_purge_queue(struct qfq_class *cl) -{ - unsigned int len = cl->qdisc->q.qlen; - unsigned int backlog = cl->qdisc->qstats.backlog; - - qdisc_reset(cl->qdisc); - qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); -} - static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { [TCA_QFQ_WEIGHT] = { .type = NLA_U32 }, [TCA_QFQ_LMAX] = { .type = NLA_U32 }, @@ -551,7 +542,7 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - qfq_purge_queue(cl); + qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->common); sch_tree_unlock(sch); @@ -655,8 +646,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, - &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0) + qdisc_qstats_copy(d, cl->qdisc) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 9df9942340ea..4e8c0abf6194 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -233,8 +233,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, q->flags = ctl->flags; q->limit = ctl->limit; if (child) { - qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, - q->qdisc->qstats.backlog); + qdisc_tree_flush_backlog(q->qdisc); old_child = q->qdisc; q->qdisc = child; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index bab506b01a32..2419fdb75966 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -521,8 +521,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt, qdisc_hash_add(child, true); sch_tree_lock(sch); - qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, - q->qdisc->qstats.backlog); + qdisc_tree_flush_backlog(q->qdisc); qdisc_put(q->qdisc); q->qdisc = child; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 206e4dbed12f..c7041999eb5d 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -895,7 +895,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch = dev_queue->qdisc_sleeping; if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) + qdisc_qstats_copy(d, sch) < 0) return -1; return 0; } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 7f272a9070c5..f71578dbb9e3 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -391,8 +391,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); if (child) { - qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, - q->qdisc->qstats.backlog); + qdisc_tree_flush_backlog(q->qdisc); qdisc_put(q->qdisc); q->qdisc = child; } diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 39d72e58b8e5..31569f4809f6 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -760,7 +760,6 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc, SHASH_DESC_ON_STACK(desc, tfm); desc->tfm = tfm; - desc->flags = 0; crypto_shash_digest(desc, (u8 *)auth, end - (unsigned char *)auth, digest); shash_desc_zero(desc); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 6abc8b274270..951afdeea5e9 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -600,6 +600,7 @@ out: static int sctp_v4_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) { /* No address mapping for V4 sockets */ + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); return sizeof(struct sockaddr_in); } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index d05c57664e36..72e74503f9fc 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1684,7 +1684,6 @@ static struct sctp_cookie_param *sctp_pack_cookie( /* Sign the message. */ desc->tfm = sctp_sk(ep->base.sk)->hmac; - desc->flags = 0; err = crypto_shash_setkey(desc->tfm, ep->secret_key, sizeof(ep->secret_key)) ?: @@ -1755,7 +1754,6 @@ struct sctp_association *sctp_unpack_cookie( int err; desc->tfm = sctp_sk(ep->base.sk)->hmac; - desc->flags = 0; err = crypto_shash_setkey(desc->tfm, ep->secret_key, sizeof(ep->secret_key)) ?: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 1d143bc3f73d..4aa03588f87b 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1112,32 +1112,6 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc, } -/* Sent the next ASCONF packet currently stored in the association. - * This happens after the ASCONF_ACK was succeffully processed. - */ -static void sctp_cmd_send_asconf(struct sctp_association *asoc) -{ - struct net *net = sock_net(asoc->base.sk); - - /* Send the next asconf chunk from the addip chunk - * queue. - */ - if (!list_empty(&asoc->addip_chunk_list)) { - struct list_head *entry = asoc->addip_chunk_list.next; - struct sctp_chunk *asconf = list_entry(entry, - struct sctp_chunk, list); - list_del_init(entry); - - /* Hold the chunk until an ASCONF_ACK is received. */ - sctp_chunk_hold(asconf); - if (sctp_primitive_ASCONF(net, asoc, asconf)) - sctp_chunk_free(asconf); - else - asoc->addip_last_asconf = asconf; - } -} - - /* These three macros allow us to pull the debugging code out of the * main flow of sctp_do_sm() to keep attention focused on the real * functionality there. @@ -1783,9 +1757,6 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, } sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp); break; - case SCTP_CMD_SEND_NEXT_ASCONF: - sctp_cmd_send_asconf(asoc); - break; case SCTP_CMD_PURGE_ASCONF_QUEUE: sctp_asconf_queue_teardown(asoc); break; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index c9ae3404b1bb..713a669d2058 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3824,6 +3824,29 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, return SCTP_DISPOSITION_CONSUME; } +static enum sctp_disposition sctp_send_next_asconf( + struct net *net, + const struct sctp_endpoint *ep, + struct sctp_association *asoc, + const union sctp_subtype type, + struct sctp_cmd_seq *commands) +{ + struct sctp_chunk *asconf; + struct list_head *entry; + + if (list_empty(&asoc->addip_chunk_list)) + return SCTP_DISPOSITION_CONSUME; + + entry = asoc->addip_chunk_list.next; + asconf = list_entry(entry, struct sctp_chunk, list); + + list_del_init(entry); + sctp_chunk_hold(asconf); + asoc->addip_last_asconf = asconf; + + return sctp_sf_do_prm_asconf(net, ep, asoc, type, asconf, commands); +} + /* * ADDIP Section 4.3 General rules for address manipulation * When building TLV parameters for the ASCONF Chunk that will add or @@ -3915,14 +3938,10 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); if (!sctp_process_asconf_ack((struct sctp_association *)asoc, - asconf_ack)) { - /* Successfully processed ASCONF_ACK. We can - * release the next asconf if we have one. - */ - sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF, - SCTP_NULL()); - return SCTP_DISPOSITION_CONSUME; - } + asconf_ack)) + return sctp_send_next_asconf(net, ep, + (struct sctp_association *)asoc, + type, commands); abort = sctp_make_abort(asoc, asconf_ack, sizeof(struct sctp_errhdr)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6140471efd4b..4583fa914e62 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -999,7 +999,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, if (unlikely(addrs_size <= 0)) return -EINVAL; - kaddrs = vmemdup_user(addrs, addrs_size); + kaddrs = memdup_user(addrs, addrs_size); if (unlikely(IS_ERR(kaddrs))) return PTR_ERR(kaddrs); @@ -1007,7 +1007,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, addr_buf = kaddrs; while (walk_size < addrs_size) { if (walk_size + sizeof(sa_family_t) > addrs_size) { - kvfree(kaddrs); + kfree(kaddrs); return -EINVAL; } @@ -1018,7 +1018,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, * causes the address buffer to overflow return EINVAL. */ if (!af || (walk_size + af->sockaddr_len) > addrs_size) { - kvfree(kaddrs); + kfree(kaddrs); return -EINVAL; } addrcnt++; @@ -1054,7 +1054,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, } out: - kvfree(kaddrs); + kfree(kaddrs); return err; } @@ -1329,7 +1329,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk, if (unlikely(addrs_size <= 0)) return -EINVAL; - kaddrs = vmemdup_user(addrs, addrs_size); + kaddrs = memdup_user(addrs, addrs_size); if (unlikely(IS_ERR(kaddrs))) return PTR_ERR(kaddrs); @@ -1349,7 +1349,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk, err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id); out_free: - kvfree(kaddrs); + kfree(kaddrs); return err; } @@ -2920,6 +2920,9 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, return 0; } + if (sctp_style(sk, TCP)) + params.sack_assoc_id = SCTP_FUTURE_ASSOC; + if (params.sack_assoc_id == SCTP_FUTURE_ASSOC || params.sack_assoc_id == SCTP_ALL_ASSOC) { if (params.sack_delay) { @@ -3024,6 +3027,9 @@ static int sctp_setsockopt_default_send_param(struct sock *sk, return 0; } + if (sctp_style(sk, TCP)) + info.sinfo_assoc_id = SCTP_FUTURE_ASSOC; + if (info.sinfo_assoc_id == SCTP_FUTURE_ASSOC || info.sinfo_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info.sinfo_stream; @@ -3081,6 +3087,9 @@ static int sctp_setsockopt_default_sndinfo(struct sock *sk, return 0; } + if (sctp_style(sk, TCP)) + info.snd_assoc_id = SCTP_FUTURE_ASSOC; + if (info.snd_assoc_id == SCTP_FUTURE_ASSOC || info.snd_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info.snd_sid; @@ -3531,6 +3540,9 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval, return 0; } + if (sctp_style(sk, TCP)) + params.assoc_id = SCTP_FUTURE_ASSOC; + if (params.assoc_id == SCTP_FUTURE_ASSOC || params.assoc_id == SCTP_ALL_ASSOC) sp->default_rcv_context = params.assoc_value; @@ -3670,6 +3682,9 @@ static int sctp_setsockopt_maxburst(struct sock *sk, return 0; } + if (sctp_style(sk, TCP)) + params.assoc_id = SCTP_FUTURE_ASSOC; + if (params.assoc_id == SCTP_FUTURE_ASSOC || params.assoc_id == SCTP_ALL_ASSOC) sp->max_burst = params.assoc_value; @@ -3798,6 +3813,9 @@ static int sctp_setsockopt_auth_key(struct sock *sk, goto out; } + if (sctp_style(sk, TCP)) + authkey->sca_assoc_id = SCTP_FUTURE_ASSOC; + if (authkey->sca_assoc_id == SCTP_FUTURE_ASSOC || authkey->sca_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_set_key(ep, asoc, authkey); @@ -3853,6 +3871,9 @@ static int sctp_setsockopt_active_key(struct sock *sk, if (asoc) return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + if (sctp_style(sk, TCP)) + val.scact_assoc_id = SCTP_FUTURE_ASSOC; + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || val.scact_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); @@ -3904,6 +3925,9 @@ static int sctp_setsockopt_del_key(struct sock *sk, if (asoc) return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + if (sctp_style(sk, TCP)) + val.scact_assoc_id = SCTP_FUTURE_ASSOC; + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || val.scact_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); @@ -3954,6 +3978,9 @@ static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval, if (asoc) return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + if (sctp_style(sk, TCP)) + val.scact_assoc_id = SCTP_FUTURE_ASSOC; + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || val.scact_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); @@ -4169,6 +4196,9 @@ static int sctp_setsockopt_default_prinfo(struct sock *sk, goto out; } + if (sctp_style(sk, TCP)) + info.pr_assoc_id = SCTP_FUTURE_ASSOC; + if (info.pr_assoc_id == SCTP_FUTURE_ASSOC || info.pr_assoc_id == SCTP_ALL_ASSOC) { SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy); @@ -4251,6 +4281,9 @@ static int sctp_setsockopt_enable_strreset(struct sock *sk, goto out; } + if (sctp_style(sk, TCP)) + params.assoc_id = SCTP_FUTURE_ASSOC; + if (params.assoc_id == SCTP_FUTURE_ASSOC || params.assoc_id == SCTP_ALL_ASSOC) ep->strreset_enable = params.assoc_value; @@ -4376,6 +4409,9 @@ static int sctp_setsockopt_scheduler(struct sock *sk, if (asoc) return sctp_sched_set_sched(asoc, params.assoc_value); + if (sctp_style(sk, TCP)) + params.assoc_id = SCTP_FUTURE_ASSOC; + if (params.assoc_id == SCTP_FUTURE_ASSOC || params.assoc_id == SCTP_ALL_ASSOC) sp->default_ss = params.assoc_value; @@ -4541,6 +4577,9 @@ static int sctp_setsockopt_event(struct sock *sk, char __user *optval, if (asoc) return sctp_assoc_ulpevent_type_set(¶m, asoc); + if (sctp_style(sk, TCP)) + param.se_assoc_id = SCTP_FUTURE_ASSOC; + if (param.se_assoc_id == SCTP_FUTURE_ASSOC || param.se_assoc_id == SCTP_ALL_ASSOC) sctp_ulpevent_type_set(&sp->subscribe, @@ -4808,7 +4847,8 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr, } /* Validate addr_len before calling common connect/connectx routine. */ - af = sctp_get_af_specific(addr->sa_family); + af = addr_len < offsetofend(struct sockaddr, sa_family) ? NULL : + sctp_get_af_specific(addr->sa_family); if (!af || addr_len < af->sockaddr_len) { err = -EINVAL; } else { @@ -9169,7 +9209,7 @@ static inline void sctp_copy_descendant(struct sock *sk_to, { int ancestor_size = sizeof(struct inet_sock) + sizeof(struct sctp_sock) - - offsetof(struct sctp_sock, auto_asconf_list); + offsetof(struct sctp_sock, pd_lobby); if (sk_from->sk_family == PF_INET6) ancestor_size += sizeof(struct ipv6_pinfo); @@ -9253,7 +9293,6 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, * 2) Peeling off partial delivery; keep pd_lobby in new pd_lobby. * 3) Peeling off non-partial delivery; move pd_lobby to receive_queue. */ - skb_queue_head_init(&newsp->pd_lobby); atomic_set(&sctp_sk(newsk)->pd_mode, assoc->ulpq.pd_mode); if (atomic_read(&sctp_sk(oldsk)->pd_mode)) { diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 77ef53596d18..6f869ef49b32 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -167,10 +167,9 @@ static int smc_release(struct socket *sock) if (sk->sk_state == SMC_CLOSED) { if (smc->clcsock) { - mutex_lock(&smc->clcsock_release_lock); - sock_release(smc->clcsock); - smc->clcsock = NULL; - mutex_unlock(&smc->clcsock_release_lock); + release_sock(sk); + smc_clcsock_release(smc); + lock_sock(sk); } if (!smc->use_fallback) smc_conn_free(&smc->conn); @@ -446,10 +445,19 @@ static void smc_link_save_peer_info(struct smc_link *link, link->peer_mtu = clc->qp_mtu; } +static void smc_switch_to_fallback(struct smc_sock *smc) +{ + smc->use_fallback = true; + if (smc->sk.sk_socket && smc->sk.sk_socket->file) { + smc->clcsock->file = smc->sk.sk_socket->file; + smc->clcsock->file->private_data = smc->clcsock; + } +} + /* fall back during connect */ static int smc_connect_fallback(struct smc_sock *smc, int reason_code) { - smc->use_fallback = true; + smc_switch_to_fallback(smc); smc->fallback_rsn = reason_code; smc_copy_sock_settings_to_clc(smc); if (smc->sk.sk_state == SMC_INIT) @@ -775,10 +783,14 @@ static void smc_connect_work(struct work_struct *work) smc->sk.sk_err = -rc; out: - if (smc->sk.sk_err) - smc->sk.sk_state_change(&smc->sk); - else - smc->sk.sk_write_space(&smc->sk); + if (!sock_flag(&smc->sk, SOCK_DEAD)) { + if (smc->sk.sk_err) { + smc->sk.sk_state_change(&smc->sk); + } else { /* allow polling before and after fallback decision */ + smc->clcsock->sk->sk_write_space(smc->clcsock->sk); + smc->sk.sk_write_space(&smc->sk); + } + } kfree(smc->connect_info); smc->connect_info = NULL; release_sock(&smc->sk); @@ -872,11 +884,11 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) if (rc < 0) lsk->sk_err = -rc; if (rc < 0 || lsk->sk_state == SMC_CLOSED) { + new_sk->sk_prot->unhash(new_sk); if (new_clcsock) sock_release(new_clcsock); new_sk->sk_state = SMC_CLOSED; sock_set_flag(new_sk, SOCK_DEAD); - new_sk->sk_prot->unhash(new_sk); sock_put(new_sk); /* final */ *new_smc = NULL; goto out; @@ -927,16 +939,21 @@ struct sock *smc_accept_dequeue(struct sock *parent, smc_accept_unlink(new_sk); if (new_sk->sk_state == SMC_CLOSED) { + new_sk->sk_prot->unhash(new_sk); if (isk->clcsock) { sock_release(isk->clcsock); isk->clcsock = NULL; } - new_sk->sk_prot->unhash(new_sk); sock_put(new_sk); /* final */ continue; } - if (new_sock) + if (new_sock) { sock_graft(new_sk, new_sock); + if (isk->use_fallback) { + smc_sk(new_sk)->clcsock->file = new_sock->file; + isk->clcsock->file->private_data = isk->clcsock; + } + } return new_sk; } return NULL; @@ -956,6 +973,7 @@ void smc_close_non_accepted(struct sock *sk) sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; } + sk->sk_prot->unhash(sk); if (smc->clcsock) { struct socket *tcp; @@ -971,7 +989,6 @@ void smc_close_non_accepted(struct sock *sk) smc_conn_free(&smc->conn); } release_sock(sk); - sk->sk_prot->unhash(sk); sock_put(sk); /* final sock_put */ } @@ -1037,13 +1054,13 @@ static void smc_listen_out(struct smc_sock *new_smc) struct smc_sock *lsmc = new_smc->listen_smc; struct sock *newsmcsk = &new_smc->sk; - lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); if (lsmc->sk.sk_state == SMC_LISTEN) { + lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); smc_accept_enqueue(&lsmc->sk, newsmcsk); + release_sock(&lsmc->sk); } else { /* no longer listening */ smc_close_non_accepted(newsmcsk); } - release_sock(&lsmc->sk); /* Wake up accept */ lsmc->sk.sk_data_ready(&lsmc->sk); @@ -1087,7 +1104,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, return; } smc_conn_free(&new_smc->conn); - new_smc->use_fallback = true; + smc_switch_to_fallback(new_smc); new_smc->fallback_rsn = reason_code; if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { if (smc_clc_send_decline(new_smc, reason_code) < 0) { @@ -1237,6 +1254,9 @@ static void smc_listen_work(struct work_struct *work) int rc = 0; u8 ibport; + if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) + return smc_listen_out_err(new_smc); + if (new_smc->use_fallback) { smc_listen_out_connected(new_smc); return; @@ -1244,7 +1264,7 @@ static void smc_listen_work(struct work_struct *work) /* check if peer is smc capable */ if (!tcp_sk(newclcsock->sk)->syn_smc) { - new_smc->use_fallback = true; + smc_switch_to_fallback(new_smc); new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC; smc_listen_out_connected(new_smc); return; @@ -1501,7 +1521,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_FASTOPEN) { if (sk->sk_state == SMC_INIT) { - smc->use_fallback = true; + smc_switch_to_fallback(smc); smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; } else { rc = -EINVAL; @@ -1703,7 +1723,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, case TCP_FASTOPEN_NO_COOKIE: /* option not supported by SMC */ if (sk->sk_state == SMC_INIT) { - smc->use_fallback = true; + smc_switch_to_fallback(smc); smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; } else { if (!smc->use_fallback) diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 2ad37e998509..fc06720b53c1 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -21,6 +21,22 @@ #define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ) +/* release the clcsock that is assigned to the smc_sock */ +void smc_clcsock_release(struct smc_sock *smc) +{ + struct socket *tcp; + + if (smc->listen_smc && current_work() != &smc->smc_listen_work) + cancel_work_sync(&smc->smc_listen_work); + mutex_lock(&smc->clcsock_release_lock); + if (smc->clcsock) { + tcp = smc->clcsock; + smc->clcsock = NULL; + sock_release(tcp); + } + mutex_unlock(&smc->clcsock_release_lock); +} + static void smc_close_cleanup_listen(struct sock *parent) { struct sock *sk; @@ -321,6 +337,7 @@ static void smc_close_passive_work(struct work_struct *work) close_work); struct smc_sock *smc = container_of(conn, struct smc_sock, conn); struct smc_cdc_conn_state_flags *rxflags; + bool release_clcsock = false; struct sock *sk = &smc->sk; int old_state; @@ -400,13 +417,13 @@ wakeup: if ((sk->sk_state == SMC_CLOSED) && (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { smc_conn_free(conn); - if (smc->clcsock) { - sock_release(smc->clcsock); - smc->clcsock = NULL; - } + if (smc->clcsock) + release_clcsock = true; } } release_sock(sk); + if (release_clcsock) + smc_clcsock_release(smc); sock_put(sk); /* sock_hold done by schedulers of close_work */ } diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h index 19eb6a211c23..e0e3b5df25d2 100644 --- a/net/smc/smc_close.h +++ b/net/smc/smc_close.h @@ -23,5 +23,6 @@ void smc_close_wake_tx_prepared(struct smc_sock *smc); int smc_close_active(struct smc_sock *smc); int smc_close_shutdown_write(struct smc_sock *smc); void smc_close_init(struct smc_sock *smc); +void smc_clcsock_release(struct smc_sock *smc); #endif /* SMC_CLOSE_H */ diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 2fff79db1a59..e89e918b88e0 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -289,6 +289,11 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, INIT_LIST_HEAD(&smcd->vlan); smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)", WQ_MEM_RECLAIM, name); + if (!smcd->event_wq) { + kfree(smcd->conn); + kfree(smcd); + return NULL; + } return smcd; } EXPORT_SYMBOL_GPL(smcd_alloc_dev); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 8d2f6296279c..0285c7f9e79b 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -603,7 +603,8 @@ static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); - return smc_pnet_remove_by_pnetid(net, NULL); + smc_pnet_remove_by_pnetid(net, NULL); + return 0; } /* SMC_PNETID generic netlink operation definition */ diff --git a/net/socket.c b/net/socket.c index 3c176a12fe48..8255f5bda0aa 100644 --- a/net/socket.c +++ b/net/socket.c @@ -384,6 +384,18 @@ static struct file_system_type sock_fs_type = { * but we take care of internal coherence yet. */ +/** + * sock_alloc_file - Bind a &socket to a &file + * @sock: socket + * @flags: file status flags + * @dname: protocol name + * + * Returns the &file bound with @sock, implicitly storing it + * in sock->file. If dname is %NULL, sets to "". + * On failure the return is a ERR pointer (see linux/err.h). + * This function uses GFP_KERNEL internally. + */ + struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) { struct file *file; @@ -424,6 +436,14 @@ static int sock_map_fd(struct socket *sock, int flags) return PTR_ERR(newfile); } +/** + * sock_from_file - Return the &socket bounded to @file. + * @file: file + * @err: pointer to an error code return + * + * On failure returns %NULL and assigns -ENOTSOCK to @err. + */ + struct socket *sock_from_file(struct file *file, int *err) { if (file->f_op == &socket_file_ops) @@ -532,11 +552,11 @@ static const struct inode_operations sockfs_inode_ops = { }; /** - * sock_alloc - allocate a socket + * sock_alloc - allocate a socket * * Allocate a new inode and socket object. The two are bound together * and initialised. The socket is then returned. If we are out of inodes - * NULL is returned. + * NULL is returned. This functions uses GFP_KERNEL internally. */ struct socket *sock_alloc(void) @@ -561,7 +581,7 @@ struct socket *sock_alloc(void) EXPORT_SYMBOL(sock_alloc); /** - * sock_release - close a socket + * sock_release - close a socket * @sock: socket to close * * The socket is released from the protocol stack if it has a release @@ -617,6 +637,15 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) } EXPORT_SYMBOL(__sock_tx_timestamp); +/** + * sock_sendmsg - send a message through @sock + * @sock: socket + * @msg: message to send + * + * Sends @msg through @sock, passing through LSM. + * Returns the number of bytes sent, or an error code. + */ + static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg)); @@ -633,6 +662,18 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg) } EXPORT_SYMBOL(sock_sendmsg); +/** + * kernel_sendmsg - send a message through @sock (kernel-space) + * @sock: socket + * @msg: message header + * @vec: kernel vec + * @num: vec array length + * @size: total message data size + * + * Builds the message data with @vec and sends it through @sock. + * Returns the number of bytes sent, or an error code. + */ + int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { @@ -641,6 +682,19 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, } EXPORT_SYMBOL(kernel_sendmsg); +/** + * kernel_sendmsg_locked - send a message through @sock (kernel-space) + * @sk: sock + * @msg: message header + * @vec: output s/g array + * @num: output s/g array length + * @size: total message data size + * + * Builds the message data with @vec and sends it through @sock. + * Returns the number of bytes sent, or an error code. + * Caller must hold @sk. + */ + int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { @@ -811,6 +865,16 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); +/** + * sock_recvmsg - receive a message from @sock + * @sock: socket + * @msg: message to receive + * @flags: message flags + * + * Receives @msg from @sock, passing through LSM. Returns the total number + * of bytes received, or an error. + */ + static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, int flags) { @@ -826,20 +890,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) EXPORT_SYMBOL(sock_recvmsg); /** - * kernel_recvmsg - Receive a message from a socket (kernel space) - * @sock: The socket to receive the message from - * @msg: Received message - * @vec: Input s/g array for message data - * @num: Size of input s/g array - * @size: Number of bytes to read - * @flags: Message flags (MSG_DONTWAIT, etc...) + * kernel_recvmsg - Receive a message from a socket (kernel space) + * @sock: The socket to receive the message from + * @msg: Received message + * @vec: Input s/g array for message data + * @num: Size of input s/g array + * @size: Number of bytes to read + * @flags: Message flags (MSG_DONTWAIT, etc...) * - * On return the msg structure contains the scatter/gather array passed in the - * vec argument. The array is modified so that it consists of the unfilled - * portion of the original array. + * On return the msg structure contains the scatter/gather array passed in the + * vec argument. The array is modified so that it consists of the unfilled + * portion of the original array. * - * The returned value is the total number of bytes received, or an error. + * The returned value is the total number of bytes received, or an error. */ + int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags) { @@ -1005,6 +1070,13 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * what to do with it - that's up to the protocol still. */ +/** + * get_net_ns - increment the refcount of the network namespace + * @ns: common namespace (net) + * + * Returns the net's common namespace. + */ + struct ns_common *get_net_ns(struct ns_common *ns) { return &get_net(container_of(ns, struct net, ns))->ns; @@ -1099,6 +1171,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) return err; } +/** + * sock_create_lite - creates a socket + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * + * Creates a new socket and assigns it to @res, passing through LSM. + * The new socket initialization is not complete, see kernel_accept(). + * Returns 0 or an error. On failure @res is set to %NULL. + * This function internally uses GFP_KERNEL. + */ + int sock_create_lite(int family, int type, int protocol, struct socket **res) { int err; @@ -1224,6 +1309,21 @@ call_kill: } EXPORT_SYMBOL(sock_wake_async); +/** + * __sock_create - creates a socket + * @net: net namespace + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * @kern: boolean for kernel space sockets + * + * Creates a new socket and assigns it to @res, passing through LSM. + * Returns 0 or an error. On failure @res is set to %NULL. @kern must + * be set to true if the socket resides in kernel space. + * This function internally uses GFP_KERNEL. + */ + int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) { @@ -1333,12 +1433,35 @@ out_release: } EXPORT_SYMBOL(__sock_create); +/** + * sock_create - creates a socket + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * + * A wrapper around __sock_create(). + * Returns 0 or an error. This function internally uses GFP_KERNEL. + */ + int sock_create(int family, int type, int protocol, struct socket **res) { return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); } EXPORT_SYMBOL(sock_create); +/** + * sock_create_kern - creates a socket (kernel space) + * @net: net namespace + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * + * A wrapper around __sock_create(). + * Returns 0 or an error. This function internally uses GFP_KERNEL. + */ + int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res) { return __sock_create(net, family, type, protocol, res, 1); @@ -3322,18 +3445,46 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd, } #endif +/** + * kernel_bind - bind an address to a socket (kernel space) + * @sock: socket + * @addr: address + * @addrlen: length of address + * + * Returns 0 or an error. + */ + int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { return sock->ops->bind(sock, addr, addrlen); } EXPORT_SYMBOL(kernel_bind); +/** + * kernel_listen - move socket to listening state (kernel space) + * @sock: socket + * @backlog: pending connections queue size + * + * Returns 0 or an error. + */ + int kernel_listen(struct socket *sock, int backlog) { return sock->ops->listen(sock, backlog); } EXPORT_SYMBOL(kernel_listen); +/** + * kernel_accept - accept a connection (kernel space) + * @sock: listening socket + * @newsock: new connected socket + * @flags: flags + * + * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0. + * If it fails, @newsock is guaranteed to be %NULL. + * Returns 0 or an error. + */ + int kernel_accept(struct socket *sock, struct socket **newsock, int flags) { struct sock *sk = sock->sk; @@ -3359,6 +3510,19 @@ done: } EXPORT_SYMBOL(kernel_accept); +/** + * kernel_connect - connect a socket (kernel space) + * @sock: socket + * @addr: address + * @addrlen: address length + * @flags: flags (O_NONBLOCK, ...) + * + * For datagram sockets, @addr is the addres to which datagrams are sent + * by default, and the only address from which datagrams are received. + * For stream sockets, attempts to connect to @addr. + * Returns 0 or an error code. + */ + int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags) { @@ -3366,18 +3530,48 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, } EXPORT_SYMBOL(kernel_connect); +/** + * kernel_getsockname - get the address which the socket is bound (kernel space) + * @sock: socket + * @addr: address holder + * + * Fills the @addr pointer with the address which the socket is bound. + * Returns 0 or an error code. + */ + int kernel_getsockname(struct socket *sock, struct sockaddr *addr) { return sock->ops->getname(sock, addr, 0); } EXPORT_SYMBOL(kernel_getsockname); +/** + * kernel_peername - get the address which the socket is connected (kernel space) + * @sock: socket + * @addr: address holder + * + * Fills the @addr pointer with the address which the socket is connected. + * Returns 0 or an error code. + */ + int kernel_getpeername(struct socket *sock, struct sockaddr *addr) { return sock->ops->getname(sock, addr, 1); } EXPORT_SYMBOL(kernel_getpeername); +/** + * kernel_getsockopt - get a socket option (kernel space) + * @sock: socket + * @level: API level (SOL_SOCKET, ...) + * @optname: option tag + * @optval: option value + * @optlen: option length + * + * Assigns the option length to @optlen. + * Returns 0 or an error. + */ + int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) { @@ -3400,6 +3594,17 @@ int kernel_getsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(kernel_getsockopt); +/** + * kernel_setsockopt - set a socket option (kernel space) + * @sock: socket + * @level: API level (SOL_SOCKET, ...) + * @optname: option tag + * @optval: option value + * @optlen: option length + * + * Returns 0 or an error. + */ + int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen) { @@ -3420,6 +3625,17 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(kernel_setsockopt); +/** + * kernel_sendpage - send a &page through a socket (kernel space) + * @sock: socket + * @page: page + * @offset: page offset + * @size: total size in bytes + * @flags: flags (MSG_DONTWAIT, ...) + * + * Returns the total amount sent in bytes or an error. + */ + int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { @@ -3430,6 +3646,18 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, } EXPORT_SYMBOL(kernel_sendpage); +/** + * kernel_sendpage_locked - send a &page through the locked sock (kernel space) + * @sk: sock + * @page: page + * @offset: page offset + * @size: total size in bytes + * @flags: flags (MSG_DONTWAIT, ...) + * + * Returns the total amount sent in bytes or an error. + * Caller must hold @sk. + */ + int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -3443,17 +3671,30 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(kernel_sendpage_locked); +/** + * kernel_shutdown - shut down part of a full-duplex connection (kernel space) + * @sock: socket + * @how: connection part + * + * Returns 0 or an error. + */ + int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) { return sock->ops->shutdown(sock, how); } EXPORT_SYMBOL(kernel_sock_shutdown); -/* This routine returns the IP overhead imposed by a socket i.e. - * the length of the underlying IP header, depending on whether - * this is an IPv4 or IPv6 socket and the length from IP options turned - * on at the socket. Assumes that the caller has a lock on the socket. +/** + * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket + * @sk: socket + * + * This routine returns the IP overhead imposed by a socket i.e. + * the length of the underlying IP header, depending on whether + * this is an IPv4 or IPv6 socket and the length from IP options turned + * on at the socket. Assumes that the caller has a lock on the socket. */ + u32 kernel_sock_ip_overhead(struct sock *sk) { struct inet_sock *inet; diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index da1a676860ca..fa6c977b4c41 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -140,13 +140,11 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, /* We are going to append to the frags_list of head. * Need to unshare the frag_list. */ - if (skb_has_frag_list(head)) { - err = skb_unclone(head, GFP_ATOMIC); - if (err) { - STRP_STATS_INCR(strp->stats.mem_fail); - desc->error = err; - return 0; - } + err = skb_unclone(head, GFP_ATOMIC); + if (err) { + STRP_STATS_INCR(strp->stats.mem_fail); + desc->error = err; + return 0; } if (unlikely(skb_shinfo(head)->frag_list)) { @@ -550,6 +548,8 @@ EXPORT_SYMBOL_GPL(strp_check_rcv); static int __init strp_mod_init(void) { strp_wq = create_singlethread_workqueue("kstrp"); + if (unlikely(!strp_wq)) + return -ENOMEM; return 0; } diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 4f43383971ba..6f2d30d7b766 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -977,7 +977,6 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, } desc->tfm = hmac; - desc->flags = 0; /* Compute intermediate Kseq from session key */ err = crypto_shash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength); @@ -1045,7 +1044,6 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, } desc->tfm = hmac; - desc->flags = 0; /* Compute intermediate Kcrypt from session key */ for (i = 0; i < kctx->gk5e->keylength; i++) diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 56cc85c5bc06..6e5d6d240215 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -438,7 +438,6 @@ context_derive_keys_rc4(struct krb5_ctx *ctx) } desc->tfm = hmac; - desc->flags = 0; err = crypto_shash_digest(desc, sigkeyconstant, slen, ctx->cksum); kzfree(desc); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 12bb23b8e0c5..261131dfa1f1 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -54,6 +54,7 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail) h->last_refresh = now; } +static inline int cache_is_valid(struct cache_head *h); static void cache_fresh_locked(struct cache_head *head, time_t expiry, struct cache_detail *detail); static void cache_fresh_unlocked(struct cache_head *head, @@ -105,6 +106,8 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, if (cache_is_expired(detail, tmp)) { hlist_del_init_rcu(&tmp->cache_list); detail->entries --; + if (cache_is_valid(tmp) == -EAGAIN) + set_bit(CACHE_NEGATIVE, &tmp->flags); cache_fresh_locked(tmp, 0, detail); freeme = tmp; break; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 228970e6e52b..8ff11dc98d7f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1540,7 +1540,6 @@ call_start(struct rpc_task *task) clnt->cl_stats->rpccnt++; task->tk_action = call_reserve; rpc_task_set_transport(task, clnt); - call_reserve(task); } /* @@ -1554,9 +1553,6 @@ call_reserve(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_reserveresult; xprt_reserve(task); - if (rpc_task_need_resched(task)) - return; - call_reserveresult(task); } static void call_retry_reserve(struct rpc_task *task); @@ -1579,7 +1575,6 @@ call_reserveresult(struct rpc_task *task) if (status >= 0) { if (task->tk_rqstp) { task->tk_action = call_refresh; - call_refresh(task); return; } @@ -1605,7 +1600,6 @@ call_reserveresult(struct rpc_task *task) /* fall through */ case -EAGAIN: /* woken up; retry */ task->tk_action = call_retry_reserve; - call_retry_reserve(task); return; case -EIO: /* probably a shutdown */ break; @@ -1628,9 +1622,6 @@ call_retry_reserve(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_reserveresult; xprt_retry_reserve(task); - if (rpc_task_need_resched(task)) - return; - call_reserveresult(task); } /* @@ -1645,9 +1636,6 @@ call_refresh(struct rpc_task *task) task->tk_status = 0; task->tk_client->cl_stats->rpcauthrefresh++; rpcauth_refreshcred(task); - if (rpc_task_need_resched(task)) - return; - call_refreshresult(task); } /* @@ -1666,7 +1654,6 @@ call_refreshresult(struct rpc_task *task) case 0: if (rpcauth_uptodatecred(task)) { task->tk_action = call_allocate; - call_allocate(task); return; } /* Use rate-limiting and a max number of retries if refresh @@ -1685,7 +1672,6 @@ call_refreshresult(struct rpc_task *task) task->tk_cred_retry--; dprintk("RPC: %5u %s: retry refresh creds\n", task->tk_pid, __func__); - call_refresh(task); return; } dprintk("RPC: %5u %s: refresh creds failed with error %d\n", @@ -1711,10 +1697,8 @@ call_allocate(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_encode; - if (req->rq_buffer) { - call_encode(task); + if (req->rq_buffer) return; - } if (proc->p_proc != 0) { BUG_ON(proc->p_arglen == 0); @@ -1740,12 +1724,8 @@ call_allocate(struct rpc_task *task) status = xprt->ops->buf_alloc(task); xprt_inject_disconnect(xprt); - if (status == 0) { - if (rpc_task_need_resched(task)) - return; - call_encode(task); + if (status == 0) return; - } if (status != -ENOMEM) { rpc_exit(task, status); return; @@ -1828,8 +1808,12 @@ call_encode(struct rpc_task *task) xprt_request_enqueue_receive(task); xprt_request_enqueue_transmit(task); out: - task->tk_action = call_bind; - call_bind(task); + task->tk_action = call_transmit; + /* Check that the connection is OK */ + if (!xprt_bound(task->tk_xprt)) + task->tk_action = call_bind; + else if (!xprt_connected(task->tk_xprt)) + task->tk_action = call_connect; } /* @@ -1847,7 +1831,6 @@ rpc_task_handle_transmitted(struct rpc_task *task) { xprt_end_transmit(task); task->tk_action = call_transmit_status; - call_transmit_status(task); } /* @@ -1865,7 +1848,6 @@ call_bind(struct rpc_task *task) if (xprt_bound(xprt)) { task->tk_action = call_connect; - call_connect(task); return; } @@ -1896,7 +1878,6 @@ call_bind_status(struct rpc_task *task) dprint_status(task); task->tk_status = 0; task->tk_action = call_connect; - call_connect(task); return; } @@ -1981,7 +1962,6 @@ call_connect(struct rpc_task *task) if (xprt_connected(xprt)) { task->tk_action = call_transmit; - call_transmit(task); return; } @@ -2051,7 +2031,6 @@ call_connect_status(struct rpc_task *task) case 0: clnt->cl_stats->netreconn++; task->tk_action = call_transmit; - call_transmit(task); return; } rpc_exit(task, status); @@ -2087,9 +2066,6 @@ call_transmit(struct rpc_task *task) xprt_transmit(task); } xprt_end_transmit(task); - if (rpc_task_need_resched(task)) - return; - call_transmit_status(task); } /* @@ -2105,11 +2081,8 @@ call_transmit_status(struct rpc_task *task) * test first. */ if (rpc_task_transmitted(task)) { - if (task->tk_status == 0) - xprt_request_wait_receive(task); - if (rpc_task_need_resched(task)) - return; - call_status(task); + task->tk_status = 0; + xprt_request_wait_receive(task); return; } @@ -2170,7 +2143,6 @@ call_bc_encode(struct rpc_task *task) { xprt_request_enqueue_transmit(task); task->tk_action = call_bc_transmit; - call_bc_transmit(task); } /* @@ -2195,6 +2167,9 @@ call_bc_transmit_status(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; + if (rpc_task_transmitted(task)) + task->tk_status = 0; + dprint_status(task); switch (task->tk_status) { @@ -2261,7 +2236,6 @@ call_status(struct rpc_task *task) status = task->tk_status; if (status >= 0) { task->tk_action = call_decode; - call_decode(task); return; } @@ -2311,6 +2285,15 @@ out_exit: rpc_exit(task, status); } +static bool +rpc_check_connected(const struct rpc_rqst *req) +{ + /* No allocated request or transport? return true */ + if (!req || !req->rq_xprt) + return true; + return xprt_connected(req->rq_xprt); +} + static void rpc_check_timeout(struct rpc_task *task) { @@ -2322,10 +2305,11 @@ rpc_check_timeout(struct rpc_task *task) dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid); task->tk_timeouts++; - if (RPC_IS_SOFTCONN(task)) { + if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) { rpc_exit(task, -ETIMEDOUT); return; } + if (RPC_IS_SOFT(task)) { if (clnt->cl_chatty) { printk(KERN_NOTICE "%s: server %s not responding, timed out\n", diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 89a63391d4d4..30cfc0efe699 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -90,7 +90,7 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) /* Flush Receives, then wait for deferred Reply work * to complete. */ - ib_drain_qp(ia->ri_id->qp); + ib_drain_rq(ia->ri_id->qp); drain_workqueue(buf->rb_completion_wq); /* Deferred Reply processing might have scheduled diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9359539907ba..732d4b57411a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -495,8 +495,8 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, int flags, struct rpc_rqst *req) { struct xdr_buf *buf = &req->rq_private_buf; - size_t want, read; - ssize_t ret; + size_t want, uninitialized_var(read); + ssize_t uninitialized_var(ret); xs_read_header(transport, buf); diff --git a/net/tipc/group.c b/net/tipc/group.c index 06fee142f09f..63f39201e41e 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -919,6 +919,9 @@ int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb) { struct nlattr *group = nla_nest_start(skb, TIPC_NLA_SOCK_GROUP); + if (!group) + return -EMSGSIZE; + if (nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_ID, grp->type) || nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_INSTANCE, diff --git a/net/tipc/link.c b/net/tipc/link.c index 341ecd796aa4..131aa2f0fd27 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -869,6 +869,8 @@ void tipc_link_reset(struct tipc_link *l) __skb_queue_head_init(&list); l->in_session = false; + /* Force re-synch of peer session number before establishing */ + l->peer_session--; l->session++; l->mtu = l->advertised_mtu; diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index bff241f03525..89993afe0fbd 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -909,7 +909,8 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg, for (; i < TIPC_NAMETBL_SIZE; i++) { head = &tn->nametbl->services[i]; - if (*last_type) { + if (*last_type || + (!i && *last_key && (*last_lower == *last_key))) { service = tipc_service_find(net, *last_type); if (!service) return -EPIPE; diff --git a/net/tipc/net.c b/net/tipc/net.c index f076edb74338..7ce1e86b024f 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -163,12 +163,9 @@ void tipc_sched_net_finalize(struct net *net, u32 addr) void tipc_net_stop(struct net *net) { - u32 self = tipc_own_addr(net); - - if (!self) + if (!tipc_own_id(net)) return; - tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, self, self); rtnl_lock(); tipc_bearer_stop(net); tipc_node_stop(net); diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 4ad3586da8f0..340a6e7c43a7 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -267,8 +267,14 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, if (msg->rep_type) tipc_tlv_init(msg->rep, msg->rep_type); - if (cmd->header) - (*cmd->header)(msg); + if (cmd->header) { + err = (*cmd->header)(msg); + if (err) { + kfree_skb(msg->rep); + msg->rep = NULL; + return err; + } + } arg = nlmsg_new(0, GFP_KERNEL); if (!arg) { @@ -397,7 +403,12 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_bearer_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); if (!string_is_valid(b->name, len)) return -EINVAL; @@ -766,7 +777,12 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, lc = (struct tipc_link_config *)TLV_DATA(msg->req); - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_link_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); if (!string_is_valid(lc->name, len)) return -EINVAL; diff --git a/net/tipc/node.c b/net/tipc/node.c index 2dc4919ab23c..dd3b6dc17662 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -817,10 +817,10 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) { struct tipc_link_entry *le = &n->links[bearer_id]; + struct tipc_media_addr *maddr = NULL; struct tipc_link *l = le->link; - struct tipc_media_addr *maddr; - struct sk_buff_head xmitq; int old_bearer_id = bearer_id; + struct sk_buff_head xmitq; if (!l) return; @@ -844,7 +844,8 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_node_write_unlock(n); if (delete) tipc_mon_remove_peer(n->net, n->addr, old_bearer_id); - tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); tipc_sk_rcv(n->net, &le->inputq); } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3274ef625dba..b542f14ed444 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2349,6 +2349,16 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) return 0; } +static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr) +{ + if (addr->family != AF_TIPC) + return false; + if (addr->addrtype == TIPC_SERVICE_RANGE) + return (addr->addr.nameseq.lower <= addr->addr.nameseq.upper); + return (addr->addrtype == TIPC_SERVICE_ADDR || + addr->addrtype == TIPC_SOCKET_ADDR); +} + /** * tipc_connect - establish a connection to another TIPC port * @sock: socket structure @@ -2384,18 +2394,18 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, if (!tipc_sk_type_connectionless(sk)) res = -EINVAL; goto exit; - } else if (dst->family != AF_TIPC) { - res = -EINVAL; } - if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME) + if (!tipc_sockaddr_is_sane(dst)) { res = -EINVAL; - if (res) goto exit; - + } /* DGRAM/RDM connect(), just save the destaddr */ if (tipc_sk_type_connectionless(sk)) { memcpy(&tsk->peer, dest, destlen); goto exit; + } else if (dst->addrtype == TIPC_SERVICE_RANGE) { + res = -EINVAL; + goto exit; } previous = sk->sk_state; @@ -3255,6 +3265,8 @@ static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) peer_port = tsk_peer_port(tsk); nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON); + if (!nest) + return -EMSGSIZE; if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node)) goto msg_full; diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c index 3481e4906bd6..9df82a573aa7 100644 --- a/net/tipc/sysctl.c +++ b/net/tipc/sysctl.c @@ -38,6 +38,8 @@ #include <linux/sysctl.h> +static int zero; +static int one = 1; static struct ctl_table_header *tipc_ctl_hdr; static struct ctl_table tipc_table[] = { @@ -46,14 +48,16 @@ static struct ctl_table tipc_table[] = { .data = &sysctl_tipc_rmem, .maxlen = sizeof(sysctl_tipc_rmem), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "named_timeout", .data = &sysctl_tipc_named_timeout, .maxlen = sizeof(sysctl_tipc_named_timeout), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "sk_filter", diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 4a708a4e8583..b45932d78004 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -363,6 +363,7 @@ static int tipc_conn_rcv_sub(struct tipc_topsrv *srv, struct tipc_subscription *sub; if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) { + s->filter &= __constant_ntohl(~TIPC_SUB_CANCEL); tipc_conn_delete_sub(con, s); return 0; } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 135a7ee9db03..14dedb24fa7b 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -52,8 +52,11 @@ static DEFINE_SPINLOCK(tls_device_lock); static void tls_device_free_ctx(struct tls_context *ctx) { - if (ctx->tx_conf == TLS_HW) + if (ctx->tx_conf == TLS_HW) { kfree(tls_offload_ctx_tx(ctx)); + kfree(ctx->tx.rec_seq); + kfree(ctx->tx.iv); + } if (ctx->rx_conf == TLS_HW) kfree(tls_offload_ctx_rx(ctx)); @@ -216,6 +219,13 @@ void tls_device_sk_destruct(struct sock *sk) } EXPORT_SYMBOL(tls_device_sk_destruct); +void tls_device_free_resources_tx(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + + tls_free_partial_record(sk, tls_ctx); +} + static void tls_append_frag(struct tls_record_info *record, struct page_frag *pfrag, int size) @@ -587,7 +597,7 @@ void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn) static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) { struct strp_msg *rxm = strp_msg(skb); - int err = 0, offset = rxm->offset, copy, nsg; + int err = 0, offset = rxm->offset, copy, nsg, data_len, pos; struct sk_buff *skb_iter, *unused; struct scatterlist sg[1]; char *orig_buf, *buf; @@ -618,25 +628,42 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) else err = 0; - copy = min_t(int, skb_pagelen(skb) - offset, - rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE); + data_len = rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE; - if (skb->decrypted) - skb_store_bits(skb, offset, buf, copy); + if (skb_pagelen(skb) > offset) { + copy = min_t(int, skb_pagelen(skb) - offset, data_len); - offset += copy; - buf += copy; + if (skb->decrypted) + skb_store_bits(skb, offset, buf, copy); + + offset += copy; + buf += copy; + } + pos = skb_pagelen(skb); skb_walk_frags(skb, skb_iter) { - copy = min_t(int, skb_iter->len, - rxm->full_len - offset + rxm->offset - - TLS_CIPHER_AES_GCM_128_TAG_SIZE); + int frag_pos; + + /* Practically all frags must belong to msg if reencrypt + * is needed with current strparser and coalescing logic, + * but strparser may "get optimized", so let's be safe. + */ + if (pos + skb_iter->len <= offset) + goto done_with_frag; + if (pos >= data_len + rxm->offset) + break; + + frag_pos = offset - pos; + copy = min_t(int, skb_iter->len - frag_pos, + data_len + rxm->offset - offset); if (skb_iter->decrypted) - skb_store_bits(skb_iter, offset, buf, copy); + skb_store_bits(skb_iter, frag_pos, buf, copy); offset += copy; buf += copy; +done_with_frag: + pos += skb_iter->len; } free_buf: @@ -894,7 +921,9 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) goto release_netdev; free_sw_resources: + up_read(&device_offload_lock); tls_sw_free_resources_rx(sk); + down_read(&device_offload_lock); release_ctx: ctx->priv_ctx_rx = NULL; release_netdev: @@ -929,8 +958,6 @@ void tls_device_offload_cleanup_rx(struct sock *sk) } out: up_read(&device_offload_lock); - kfree(tls_ctx->rx.rec_seq); - kfree(tls_ctx->rx.iv); tls_sw_release_resources_rx(sk); } diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 54c3a758f2a7..c3a5fe624b4e 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -194,18 +194,26 @@ static void update_chksum(struct sk_buff *skb, int headln) static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln) { + struct sock *sk = skb->sk; + int delta; + skb_copy_header(nskb, skb); skb_put(nskb, skb->len); memcpy(nskb->data, skb->data, headln); - update_chksum(nskb, headln); nskb->destructor = skb->destructor; - nskb->sk = skb->sk; + nskb->sk = sk; skb->destructor = NULL; skb->sk = NULL; - refcount_add(nskb->truesize - skb->truesize, - &nskb->sk->sk_wmem_alloc); + + update_chksum(nskb, headln); + + delta = nskb->truesize - skb->truesize; + if (likely(delta < 0)) + WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc)); + else if (delta) + refcount_add(delta, &sk->sk_wmem_alloc); } /* This function may be called after the user socket is already diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index df921a2904b9..478603f43964 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -208,6 +208,26 @@ int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, return tls_push_sg(sk, ctx, sg, offset, flags); } +bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx) +{ + struct scatterlist *sg; + + sg = ctx->partially_sent_record; + if (!sg) + return false; + + while (1) { + put_page(sg_page(sg)); + sk_mem_uncharge(sk, sg->length); + + if (sg_is_last(sg)) + break; + sg++; + } + ctx->partially_sent_record = NULL; + return true; +} + static void tls_write_space(struct sock *sk) { struct tls_context *ctx = tls_get_ctx(sk); @@ -267,13 +287,14 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); tls_sw_free_resources_tx(sk); +#ifdef CONFIG_TLS_DEVICE + } else if (ctx->tx_conf == TLS_HW) { + tls_device_free_resources_tx(sk); +#endif } - if (ctx->rx_conf == TLS_SW) { - kfree(ctx->rx.rec_seq); - kfree(ctx->rx.iv); + if (ctx->rx_conf == TLS_SW) tls_sw_free_resources_rx(sk); - } #ifdef CONFIG_TLS_DEVICE if (ctx->rx_conf == TLS_HW) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 425351ac2a9b..29d6af43dd24 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1484,6 +1484,8 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, return err; } + } else { + *zc = false; } rxm->full_len -= padding_length(ctx, tls_ctx, skb); @@ -2050,20 +2052,7 @@ void tls_sw_free_resources_tx(struct sock *sk) /* Free up un-sent records in tx_list. First, free * the partially sent record if any at head of tx_list. */ - if (tls_ctx->partially_sent_record) { - struct scatterlist *sg = tls_ctx->partially_sent_record; - - while (1) { - put_page(sg_page(sg)); - sk_mem_uncharge(sk, sg->length); - - if (sg_is_last(sg)) - break; - sg++; - } - - tls_ctx->partially_sent_record = NULL; - + if (tls_free_partial_record(sk, tls_ctx)) { rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); list_del(&rec->list); @@ -2089,6 +2078,9 @@ void tls_sw_release_resources_rx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + kfree(tls_ctx->rx.rec_seq); + kfree(tls_ctx->rx.iv); + if (ctx->aead_recv) { kfree_skb(ctx->recv_pkt); ctx->recv_pkt = NULL; diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c index 35f06563207d..11eaa5956f00 100644 --- a/net/wireless/lib80211_crypt_tkip.c +++ b/net/wireless/lib80211_crypt_tkip.c @@ -501,7 +501,6 @@ static int michael_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *hdr, } desc->tfm = tfm_michael; - desc->flags = 0; if (crypto_shash_setkey(tfm_michael, key, 8)) return -1; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 25a9e3b5c154..47e30a58566c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13650,7 +13650,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEAUTHENTICATE, @@ -13701,7 +13702,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, @@ -13709,7 +13711,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DISCONNECT, @@ -13738,7 +13741,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMKSA, @@ -14090,7 +14094,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_SET_QOS_MAP, @@ -14145,7 +14150,8 @@ static const struct genl_ops nl80211_ops[] = { .doit = nl80211_set_pmk, .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMK, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2f1bf91eb226..a6fd5ce199da 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1309,6 +1309,16 @@ reg_intersect_dfs_region(const enum nl80211_dfs_regions dfs_region1, return dfs_region1; } +static void reg_wmm_rules_intersect(const struct ieee80211_wmm_ac *wmm_ac1, + const struct ieee80211_wmm_ac *wmm_ac2, + struct ieee80211_wmm_ac *intersect) +{ + intersect->cw_min = max_t(u16, wmm_ac1->cw_min, wmm_ac2->cw_min); + intersect->cw_max = max_t(u16, wmm_ac1->cw_max, wmm_ac2->cw_max); + intersect->cot = min_t(u16, wmm_ac1->cot, wmm_ac2->cot); + intersect->aifsn = max_t(u8, wmm_ac1->aifsn, wmm_ac2->aifsn); +} + /* * Helper for regdom_intersect(), this does the real * mathematical intersection fun @@ -1323,6 +1333,8 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, struct ieee80211_freq_range *freq_range; const struct ieee80211_power_rule *power_rule1, *power_rule2; struct ieee80211_power_rule *power_rule; + const struct ieee80211_wmm_rule *wmm_rule1, *wmm_rule2; + struct ieee80211_wmm_rule *wmm_rule; u32 freq_diff, max_bandwidth1, max_bandwidth2; freq_range1 = &rule1->freq_range; @@ -1333,6 +1345,10 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, power_rule2 = &rule2->power_rule; power_rule = &intersected_rule->power_rule; + wmm_rule1 = &rule1->wmm_rule; + wmm_rule2 = &rule2->wmm_rule; + wmm_rule = &intersected_rule->wmm_rule; + freq_range->start_freq_khz = max(freq_range1->start_freq_khz, freq_range2->start_freq_khz); freq_range->end_freq_khz = min(freq_range1->end_freq_khz, @@ -1376,6 +1392,29 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, intersected_rule->dfs_cac_ms = max(rule1->dfs_cac_ms, rule2->dfs_cac_ms); + if (rule1->has_wmm && rule2->has_wmm) { + u8 ac; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + reg_wmm_rules_intersect(&wmm_rule1->client[ac], + &wmm_rule2->client[ac], + &wmm_rule->client[ac]); + reg_wmm_rules_intersect(&wmm_rule1->ap[ac], + &wmm_rule2->ap[ac], + &wmm_rule->ap[ac]); + } + + intersected_rule->has_wmm = true; + } else if (rule1->has_wmm) { + *wmm_rule = *wmm_rule1; + intersected_rule->has_wmm = true; + } else if (rule2->has_wmm) { + *wmm_rule = *wmm_rule2; + intersected_rule->has_wmm = true; + } else { + intersected_rule->has_wmm = false; + } + if (!is_valid_reg_rule(intersected_rule)) return -EINVAL; @@ -3739,10 +3778,9 @@ void wiphy_regulatory_register(struct wiphy *wiphy) /* * The last request may have been received before this * registration call. Call the driver notifier if - * initiator is USER and user type is CELL_BASE. + * initiator is USER. */ - if (lr->initiator == NL80211_REGDOM_SET_BY_USER && - lr->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE) + if (lr->initiator == NL80211_REGDOM_SET_BY_USER) reg_call_notifier(wiphy, lr); } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 287518c6caa4..04d888628f29 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -190,10 +190,9 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, /* copy subelement as we need to change its content to * mark an ie after it is processed. */ - sub_copy = kmalloc(subie_len, gfp); + sub_copy = kmemdup(subelement, subie_len, gfp); if (!sub_copy) return 0; - memcpy(sub_copy, subelement, subie_len); pos = &new_ie[0]; diff --git a/net/wireless/util.c b/net/wireless/util.c index e4b8db5e81ec..75899b62bdc9 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1220,9 +1220,11 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate) else if (rate->bw == RATE_INFO_BW_HE_RU && rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_26) result = rates_26[rate->he_gi]; - else if (WARN(1, "invalid HE MCS: bw:%d, ru:%d\n", - rate->bw, rate->he_ru_alloc)) + else { + WARN(1, "invalid HE MCS: bw:%d, ru:%d\n", + rate->bw, rate->he_ru_alloc); return 0; + } /* now scale to the appropriate MCS */ tmp = result; diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 77520eacee8f..989e52386c35 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -193,9 +193,6 @@ static void xdp_umem_unaccount_pages(struct xdp_umem *umem) static void xdp_umem_release(struct xdp_umem *umem) { - struct task_struct *task; - struct mm_struct *mm; - xdp_umem_clear_dev(umem); ida_simple_remove(&umem_ida, umem->id); @@ -214,21 +211,10 @@ static void xdp_umem_release(struct xdp_umem *umem) xdp_umem_unpin_pages(umem); - task = get_pid_task(umem->pid, PIDTYPE_PID); - put_pid(umem->pid); - if (!task) - goto out; - mm = get_task_mm(task); - put_task_struct(task); - if (!mm) - goto out; - - mmput(mm); kfree(umem->pages); umem->pages = NULL; xdp_umem_unaccount_pages(umem); -out: kfree(umem); } @@ -357,7 +343,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (size_chk < 0) return -EINVAL; - umem->pid = get_task_pid(current, PIDTYPE_PID); umem->address = (unsigned long)addr; umem->chunk_mask = ~((u64)chunk_size - 1); umem->size = size; @@ -373,7 +358,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) err = xdp_umem_account_pages(umem); if (err) - goto out; + return err; err = xdp_umem_pin_pages(umem); if (err) @@ -392,8 +377,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) out_account: xdp_umem_unaccount_pages(umem); -out: - put_pid(umem->pid); return err; } diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index dbb3c1945b5c..85fec98676d3 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -70,17 +70,28 @@ static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x) return NULL; } -static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb) +static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb, + unsigned short family) { struct xfrmi_net *xfrmn; - int ifindex; struct xfrm_if *xi; + int ifindex = 0; if (!secpath_exists(skb) || !skb->dev) return NULL; + switch (family) { + case AF_INET6: + ifindex = inet6_sdif(skb); + break; + case AF_INET: + ifindex = inet_sdif(skb); + break; + } + if (!ifindex) + ifindex = skb->dev->ifindex; + xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id); - ifindex = skb->dev->ifindex; for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) { if (ifindex == xi->dev->ifindex && diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8d1a898d0ba5..a6b58df7a70f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3313,7 +3313,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, ifcb = xfrm_if_get_cb(); if (ifcb) { - xi = ifcb->decode_session(skb); + xi = ifcb->decode_session(skb, family); if (xi) { if_id = xi->p.if_id; net = xi->net; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1bb971f46fc6..c62f712fdaf7 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -434,7 +434,7 @@ EXPORT_SYMBOL(xfrm_state_free); static void ___xfrm_state_destroy(struct xfrm_state *x) { - tasklet_hrtimer_cancel(&x->mtimer); + hrtimer_cancel(&x->mtimer); del_timer_sync(&x->rtimer); kfree(x->aead); kfree(x->aalg); @@ -479,8 +479,8 @@ static void xfrm_state_gc_task(struct work_struct *work) static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) { - struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer); - struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer); + struct xfrm_state *x = container_of(me, struct xfrm_state, mtimer); + enum hrtimer_restart ret = HRTIMER_NORESTART; time64_t now = ktime_get_real_seconds(); time64_t next = TIME64_MAX; int warn = 0; @@ -544,7 +544,8 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) km_state_expired(x, 0, 0); resched: if (next != TIME64_MAX) { - tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL); + hrtimer_forward_now(&x->mtimer, ktime_set(next, 0)); + ret = HRTIMER_RESTART; } goto out; @@ -561,7 +562,7 @@ expired: out: spin_unlock(&x->lock); - return HRTIMER_NORESTART; + return ret; } static void xfrm_replay_timer_handler(struct timer_list *t); @@ -580,8 +581,8 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) INIT_HLIST_NODE(&x->bydst); INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); - tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, - CLOCK_BOOTTIME, HRTIMER_MODE_ABS); + hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT); + x->mtimer.function = xfrm_timer_handler; timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0); x->curlft.add_time = ktime_get_real_seconds(); x->lft.soft_byte_limit = XFRM_INF; @@ -1047,7 +1048,9 @@ found: hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; - tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); + hrtimer_start(&x->mtimer, + ktime_set(net->xfrm.sysctl_acq_expires, 0), + HRTIMER_MODE_REL_SOFT); net->xfrm.state_num++; xfrm_hash_grow_check(net, x->bydst.next != NULL); spin_unlock_bh(&net->xfrm.xfrm_state_lock); @@ -1159,7 +1162,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); } - tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); + hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT); if (x->replay_maxage) mod_timer(&x->rtimer, jiffies + x->replay_maxage); @@ -1266,7 +1269,9 @@ static struct xfrm_state *__find_acq_core(struct net *net, x->mark.m = m->m; x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; xfrm_state_hold(x); - tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); + hrtimer_start(&x->mtimer, + ktime_set(net->xfrm.sysctl_acq_expires, 0), + HRTIMER_MODE_REL_SOFT); list_add(&x->km.all, &net->xfrm.state_all); hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); h = xfrm_src_hash(net, daddr, saddr, family); @@ -1571,7 +1576,8 @@ out: memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); x1->km.dying = 0; - tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); + hrtimer_start(&x1->mtimer, ktime_set(1, 0), + HRTIMER_MODE_REL_SOFT); if (x1->curlft.use_time) xfrm_state_check_expire(x1); @@ -1610,7 +1616,7 @@ int xfrm_state_check_expire(struct xfrm_state *x) if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; - tasklet_hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL); + hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL_SOFT); return -EINVAL; } @@ -2384,7 +2390,7 @@ void xfrm_state_fini(struct net *net) flush_work(&net->xfrm.state_hash_work); flush_work(&xfrm_state_gc_work); - xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true); + xfrm_state_flush(net, 0, false, true); WARN_ON(!list_empty(&net->xfrm.state_all)); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index a131f9ff979e..6916931b1de1 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1424,7 +1424,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) ret = verify_policy_dir(p->dir); if (ret) return ret; - if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir)) + if (p->index && (xfrm_policy_id2dir(p->index) != p->dir)) return -EINVAL; return 0; @@ -1513,20 +1513,8 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) return -EINVAL; } - switch (ut[i].id.proto) { - case IPPROTO_AH: - case IPPROTO_ESP: - case IPPROTO_COMP: -#if IS_ENABLED(CONFIG_IPV6) - case IPPROTO_ROUTING: - case IPPROTO_DSTOPTS: -#endif - case IPSEC_PROTO_ANY: - break; - default: + if (!xfrm_id_proto_valid(ut[i].id.proto)) return -EINVAL; - } - } return 0; |