summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c2
-rw-r--r--net/9p/trans_xen.c2
-rw-r--r--net/Makefile2
-rw-r--r--net/appletalk/aarp.c15
-rw-r--r--net/appletalk/ddp.c20
-rw-r--r--net/atm/resources.c10
-rw-r--r--net/bpf/test_run.c2
-rw-r--r--net/bridge/br_netfilter_hooks.c1
-rw-r--r--net/bridge/br_netfilter_ipv6.c2
-rw-r--r--net/ceph/ceph_common.c18
-rw-r--r--net/ceph/messenger.c8
-rw-r--r--net/ceph/mon_client.c9
-rw-r--r--net/ceph/osdmap.c5
-rw-r--r--net/core/devlink.c5
-rw-r--r--net/core/ethtool.c16
-rw-r--r--net/core/filter.c35
-rw-r--r--net/core/gro_cells.c22
-rw-r--r--net/core/lwt_bpf.c2
-rw-r--r--net/core/net-sysfs.c20
-rw-r--r--net/core/skmsg.c1
-rw-r--r--net/dccp/ipv6.c4
-rw-r--r--net/hsr/hsr_device.c18
-rw-r--r--net/hsr/hsr_framereg.c12
-rw-r--r--net/hsr/hsr_framereg.h1
-rw-r--r--net/ipv4/fou.c4
-rw-r--r--net/ipv4/ip_tunnel.c9
-rw-r--r--net/ipv4/route.c13
-rw-r--r--net/ipv4/syncookies.c7
-rw-r--r--net/ipv4/tcp.c9
-rw-r--r--net/ipv4/tcp_input.c8
-rw-r--r--net/ipv4/tcp_ipv4.c9
-rw-r--r--net/ipv6/fou6.c4
-rw-r--r--net/ipv6/netfilter/ip6t_srh.c6
-rw-r--r--net/ipv6/route.c18
-rw-r--r--net/ipv6/sit.c5
-rw-r--r--net/ipv6/tcp_ipv6.c8
-rw-r--r--net/l2tp/l2tp_ip6.c4
-rw-r--r--net/mpls/mpls_iptunnel.c12
-rw-r--r--net/ncsi/ncsi-netlink.c4
-rw-r--r--net/netfilter/Kconfig1
-rw-r--r--net/netfilter/nf_conntrack_sip.c37
-rw-r--r--net/netfilter/nf_nat_masquerade.c35
-rw-r--r--net/netfilter/nf_tables_api.c59
-rw-r--r--net/netfilter/nft_dynset.c13
-rw-r--r--net/netfilter/nft_lookup.c13
-rw-r--r--net/netfilter/nft_objref.c32
-rw-r--r--net/netfilter/nft_redir.c2
-rw-r--r--net/netfilter/nft_set_rbtree.c7
-rw-r--r--net/netlink/genetlink.c3
-rw-r--r--net/nfc/llcp_sock.c9
-rw-r--r--net/openvswitch/datapath.c12
-rw-r--r--net/openvswitch/flow.h1
-rw-r--r--net/openvswitch/flow_netlink.h1
-rw-r--r--net/openvswitch/flow_table.c51
-rw-r--r--net/openvswitch/flow_table.h3
-rw-r--r--net/packet/af_packet.c7
-rw-r--r--net/rds/ib.h12
-rw-r--r--net/rds/ib_fmr.c8
-rw-r--r--net/rds/ib_frmr.c4
-rw-r--r--net/rds/ib_recv.c8
-rw-r--r--net/rds/ib_send.c15
-rw-r--r--net/rose/rose_subr.c21
-rw-r--r--net/rxrpc/conn_client.c24
-rw-r--r--net/rxrpc/output.c11
-rw-r--r--net/sched/Kconfig3
-rw-r--r--net/sched/act_api.c101
-rw-r--r--net/sched/act_bpf.c25
-rw-r--r--net/sched/act_connmark.c22
-rw-r--r--net/sched/act_csum.c22
-rw-r--r--net/sched/act_gact.c15
-rw-r--r--net/sched/act_ife.c35
-rw-r--r--net/sched/act_ipt.c11
-rw-r--r--net/sched/act_mirred.c25
-rw-r--r--net/sched/act_nat.c15
-rw-r--r--net/sched/act_pedit.c18
-rw-r--r--net/sched/act_police.c13
-rw-r--r--net/sched/act_sample.c21
-rw-r--r--net/sched/act_simple.c54
-rw-r--r--net/sched/act_skbedit.c20
-rw-r--r--net/sched/act_skbmod.c20
-rw-r--r--net/sched/act_tunnel_key.c39
-rw-r--r--net/sched/act_vlan.c22
-rw-r--r--net/sched/cls_api.c51
-rw-r--r--net/sched/cls_flower.c43
-rw-r--r--net/sched/sch_api.c15
-rw-r--r--net/sched/sch_cake.c25
-rw-r--r--net/sctp/auth.c6
-rw-r--r--net/sctp/endpointola.c18
-rw-r--r--net/sctp/socket.c98
-rw-r--r--net/sctp/stream.c127
-rw-r--r--net/sctp/stream_interleave.c2
-rw-r--r--net/smc/smc_rx.c1
-rw-r--r--net/socket.c277
-rw-r--r--net/strparser/strparser.c2
-rw-r--r--net/sunrpc/Kconfig16
-rw-r--r--net/sunrpc/auth.c136
-rw-r--r--net/sunrpc/auth_gss/Makefile2
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c553
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c29
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c8
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c27
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c15
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.h16
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c15
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.h17
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c3
-rw-r--r--net/sunrpc/auth_gss/trace.c11
-rw-r--r--net/sunrpc/auth_null.c56
-rw-r--r--net/sunrpc/auth_unix.c122
-rw-r--r--net/sunrpc/backchannel_rqst.c42
-rw-r--r--net/sunrpc/clnt.c676
-rw-r--r--net/sunrpc/sched.c17
-rw-r--r--net/sunrpc/svc.c19
-rw-r--r--net/sunrpc/svc_xprt.c24
-rw-r--r--net/sunrpc/svcsock.c20
-rw-r--r--net/sunrpc/xdr.c121
-rw-r--r--net/sunrpc/xprt.c25
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c3
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c4
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c22
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c1
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c12
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c17
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c9
-rw-r--r--net/sunrpc/xprtrdma/transport.c2
-rw-r--r--net/sunrpc/xprtrdma/verbs.c2
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h12
-rw-r--r--net/sunrpc/xprtsock.c323
-rw-r--r--net/tipc/group.c3
-rw-r--r--net/tipc/net.c5
-rw-r--r--net/tipc/node.c7
-rw-r--r--net/tipc/socket.c24
-rw-r--r--net/tipc/topsrv.c1
-rw-r--r--net/tls/tls_device.c3
-rw-r--r--net/tls/tls_main.c3
-rw-r--r--net/unix/Kconfig5
-rw-r--r--net/unix/Makefile2
-rw-r--r--net/unix/af_unix.c63
-rw-r--r--net/unix/garbage.c68
-rw-r--r--net/unix/scm.c151
-rw-r--r--net/unix/scm.h10
-rw-r--r--net/vmw_vsock/virtio_transport_common.c22
-rw-r--r--net/x25/af_x25.c7
-rw-r--r--net/xdp/xdp_umem.c19
-rw-r--r--net/xdp/xsk.c5
-rw-r--r--net/xdp/xsk_diag.c4
-rw-r--r--net/xdp/xsk_queue.h4
148 files changed, 2752 insertions, 1922 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index 357214a51f13..b85d51f4b8eb 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1061,7 +1061,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
p9_debug(P9_DEBUG_ERROR,
"Please specify a msize of at least 4k\n");
err = -EINVAL;
- goto free_client;
+ goto close_trans;
}
err = p9_client_version(clnt);
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index e2fbf3677b9b..29420ebb8f07 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -513,7 +513,7 @@ static void xen_9pfs_front_changed(struct xenbus_device *dev,
case XenbusStateClosed:
if (dev->state == XenbusStateClosed)
break;
- /* Missed the backend's CLOSING state -- fallthrough */
+ /* fall through - Missed the backend's CLOSING state */
case XenbusStateClosing:
xenbus_frontend_closed(dev);
break;
diff --git a/net/Makefile b/net/Makefile
index bdaf53925acd..449fc0b221f8 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_TLS) += tls/
obj-$(CONFIG_XFRM) += xfrm/
-obj-$(CONFIG_UNIX) += unix/
+obj-$(CONFIG_UNIX_SCM) += unix/
obj-$(CONFIG_NET) += ipv6/
obj-$(CONFIG_BPFILTER) += bpfilter/
obj-$(CONFIG_PACKET) += packet/
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 49a16cee2aae..420a98bf79b5 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -879,15 +879,24 @@ static struct notifier_block aarp_notifier = {
static unsigned char aarp_snap_id[] = { 0x00, 0x00, 0x00, 0x80, 0xF3 };
-void __init aarp_proto_init(void)
+int __init aarp_proto_init(void)
{
+ int rc;
+
aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv);
- if (!aarp_dl)
+ if (!aarp_dl) {
printk(KERN_CRIT "Unable to register AARP with SNAP.\n");
+ return -ENOMEM;
+ }
timer_setup(&aarp_timer, aarp_expire_timeout, 0);
aarp_timer.expires = jiffies + sysctl_aarp_expiry_time;
add_timer(&aarp_timer);
- register_netdevice_notifier(&aarp_notifier);
+ rc = register_netdevice_notifier(&aarp_notifier);
+ if (rc) {
+ del_timer_sync(&aarp_timer);
+ unregister_snap_client(aarp_dl);
+ }
+ return rc;
}
/* Remove the AARP entries associated with a device. */
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 795fbc6c06aa..709d2542f729 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1904,9 +1904,6 @@ static unsigned char ddp_snap_id[] = { 0x08, 0x00, 0x07, 0x80, 0x9B };
EXPORT_SYMBOL(atrtr_get_dev);
EXPORT_SYMBOL(atalk_find_dev_addr);
-static const char atalk_err_snap[] __initconst =
- KERN_CRIT "Unable to register DDP with SNAP.\n";
-
/* Called by proto.c on kernel start up */
static int __init atalk_init(void)
{
@@ -1921,17 +1918,22 @@ static int __init atalk_init(void)
goto out_proto;
ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv);
- if (!ddp_dl)
- printk(atalk_err_snap);
+ if (!ddp_dl) {
+ pr_crit("Unable to register DDP with SNAP.\n");
+ goto out_sock;
+ }
dev_add_pack(&ltalk_packet_type);
dev_add_pack(&ppptalk_packet_type);
rc = register_netdevice_notifier(&ddp_notifier);
if (rc)
- goto out_sock;
+ goto out_snap;
+
+ rc = aarp_proto_init();
+ if (rc)
+ goto out_dev;
- aarp_proto_init();
rc = atalk_proc_init();
if (rc)
goto out_aarp;
@@ -1945,11 +1947,13 @@ out_proc:
atalk_proc_exit();
out_aarp:
aarp_cleanup_module();
+out_dev:
unregister_netdevice_notifier(&ddp_notifier);
-out_sock:
+out_snap:
dev_remove_pack(&ppptalk_packet_type);
dev_remove_pack(&ltalk_packet_type);
unregister_snap_client(ddp_dl);
+out_sock:
sock_unregister(PF_APPLETALK);
out_proto:
proto_unregister(&ddp_proto);
diff --git a/net/atm/resources.c b/net/atm/resources.c
index bada395ecdb1..889349c6d90d 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -203,13 +203,9 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
int __user *sioc_len;
int __user *iobuf_len;
-#ifndef CONFIG_COMPAT
- compat = 0; /* Just so the compiler _knows_ */
-#endif
-
switch (cmd) {
case ATM_GETNAMES:
- if (compat) {
+ if (IS_ENABLED(CONFIG_COMPAT) && compat) {
#ifdef CONFIG_COMPAT
struct compat_atm_iobuf __user *ciobuf = arg;
compat_uptr_t cbuf;
@@ -253,7 +249,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
break;
}
- if (compat) {
+ if (IS_ENABLED(CONFIG_COMPAT) && compat) {
#ifdef CONFIG_COMPAT
struct compat_atmif_sioc __user *csioc = arg;
compat_uptr_t carg;
@@ -417,7 +413,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
}
/* fall through */
default:
- if (compat) {
+ if (IS_ENABLED(CONFIG_COMPAT) && compat) {
#ifdef CONFIG_COMPAT
if (!dev->ops->compat_ioctl) {
error = -EINVAL;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index da7051d62727..fab142b796ef 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -16,7 +16,7 @@
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
u32 *retval, u32 *time)
{
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
enum bpf_cgroup_storage_type stype;
u64 time_start, time_spent = 0;
int ret = 0;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 9d34de68571b..22afa566cbce 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -502,6 +502,7 @@ static unsigned int br_nf_pre_routing(void *priv,
nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr;
skb->protocol = htons(ETH_P_IP);
+ skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4;
NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
skb->dev, NULL,
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 564710f88f93..e88d6641647b 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -235,6 +235,8 @@ unsigned int br_nf_pre_routing_ipv6(void *priv,
nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr;
skb->protocol = htons(ETH_P_IPV6);
+ skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+
NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
skb->dev, NULL,
br_nf_pre_routing_finish_ipv6);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 9cab80207ced..79eac465ec65 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -738,7 +738,6 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
}
EXPORT_SYMBOL(__ceph_open_session);
-
int ceph_open_session(struct ceph_client *client)
{
int ret;
@@ -754,6 +753,23 @@ int ceph_open_session(struct ceph_client *client)
}
EXPORT_SYMBOL(ceph_open_session);
+int ceph_wait_for_latest_osdmap(struct ceph_client *client,
+ unsigned long timeout)
+{
+ u64 newest_epoch;
+ int ret;
+
+ ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch);
+ if (ret)
+ return ret;
+
+ if (client->osdc.osdmap->epoch >= newest_epoch)
+ return 0;
+
+ ceph_osdc_maybe_request_map(&client->osdc);
+ return ceph_monc_wait_osdmap(&client->monc, newest_epoch, timeout);
+}
+EXPORT_SYMBOL(ceph_wait_for_latest_osdmap);
static int __init init_ceph_lib(void)
{
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 7e71b0df1fbc..3083988ce729 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -840,6 +840,7 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
size_t bytes)
{
struct ceph_bio_iter *it = &cursor->bio_iter;
+ struct page *page = bio_iter_page(it->bio, it->iter);
BUG_ON(bytes > cursor->resid);
BUG_ON(bytes > bio_iter_len(it->bio, it->iter));
@@ -851,7 +852,8 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
return false; /* no more data */
}
- if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done))
+ if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done &&
+ page == bio_iter_page(it->bio, it->iter)))
return false; /* more bytes to process in this segment */
if (!it->iter.bi_size) {
@@ -899,6 +901,7 @@ static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor,
size_t bytes)
{
struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs;
+ struct page *page = bvec_iter_page(bvecs, cursor->bvec_iter);
BUG_ON(bytes > cursor->resid);
BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter));
@@ -910,7 +913,8 @@ static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor,
return false; /* no more data */
}
- if (!bytes || cursor->bvec_iter.bi_bvec_done)
+ if (!bytes || (cursor->bvec_iter.bi_bvec_done &&
+ page == bvec_iter_page(bvecs, cursor->bvec_iter)))
return false; /* more bytes to process in this segment */
BUG_ON(cursor->last_piece);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 18deb3d889c4..a53e4fbb6319 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -922,6 +922,15 @@ int ceph_monc_blacklist_add(struct ceph_mon_client *monc,
mutex_unlock(&monc->mutex);
ret = wait_generic_request(req);
+ if (!ret)
+ /*
+ * Make sure we have the osdmap that includes the blacklist
+ * entry. This is needed to ensure that the OSDs pick up the
+ * new blacklist before processing any future requests from
+ * this client.
+ */
+ ret = ceph_wait_for_latest_osdmap(monc->client, 0);
+
out:
put_generic_request(req);
return ret;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 98c0ff3d6441..48a31dc9161c 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -495,9 +495,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
/ sizeof(struct crush_rule_step))
goto bad;
#endif
- r = c->rules[i] = kmalloc(sizeof(*r) +
- yes*sizeof(struct crush_rule_step),
- GFP_NOFS);
+ r = kmalloc(struct_size(r, steps, yes), GFP_NOFS);
+ c->rules[i] = r;
if (r == NULL)
goto badmem;
dout(" rule %d is at %p\n", i, r);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 78e22cea4cc7..da0a29f30885 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3897,6 +3897,11 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
continue;
}
+ if (!devlink->ops->info_get) {
+ idx++;
+ continue;
+ }
+
mutex_lock(&devlink->lock);
err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
NETLINK_CB(cb->skb).portid,
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d4918ffddda8..b1eb32419732 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -2319,9 +2319,10 @@ static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr)
return ret;
}
-static int ethtool_get_per_queue_coalesce(struct net_device *dev,
- void __user *useraddr,
- struct ethtool_per_queue_op *per_queue_opt)
+static noinline_for_stack int
+ethtool_get_per_queue_coalesce(struct net_device *dev,
+ void __user *useraddr,
+ struct ethtool_per_queue_op *per_queue_opt)
{
u32 bit;
int ret;
@@ -2349,9 +2350,10 @@ static int ethtool_get_per_queue_coalesce(struct net_device *dev,
return 0;
}
-static int ethtool_set_per_queue_coalesce(struct net_device *dev,
- void __user *useraddr,
- struct ethtool_per_queue_op *per_queue_opt)
+static noinline_for_stack int
+ethtool_set_per_queue_coalesce(struct net_device *dev,
+ void __user *useraddr,
+ struct ethtool_per_queue_op *per_queue_opt)
{
u32 bit;
int i, ret = 0;
@@ -2405,7 +2407,7 @@ roll_back:
return ret;
}
-static int ethtool_set_per_queue(struct net_device *dev,
+static int noinline_for_stack ethtool_set_per_queue(struct net_device *dev,
void __user *useraddr, u32 sub_cmd)
{
struct ethtool_per_queue_op per_queue_opt;
diff --git a/net/core/filter.c b/net/core/filter.c
index 5ceba98069d4..647c63a7b25b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1796,8 +1796,6 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
{
- sk = sk_to_full_sk(sk);
-
return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
}
@@ -2804,7 +2802,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
u32 off = skb_mac_header_len(skb);
int ret;
- if (!skb_is_gso_tcp(skb))
+ if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
return -ENOTSUPP;
ret = skb_cow(skb, len_diff);
@@ -2845,7 +2843,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
u32 off = skb_mac_header_len(skb);
int ret;
- if (!skb_is_gso_tcp(skb))
+ if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
return -ENOTSUPP;
ret = skb_unclone(skb, GFP_ATOMIC);
@@ -2970,7 +2968,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
int ret;
- if (!skb_is_gso_tcp(skb))
+ if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
return -ENOTSUPP;
ret = skb_cow(skb, len_diff);
@@ -2999,7 +2997,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
int ret;
- if (!skb_is_gso_tcp(skb))
+ if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
return -ENOTSUPP;
ret = skb_unclone(skb, GFP_ATOMIC);
@@ -5266,7 +5264,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
.func = bpf_sk_release,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_SOCKET,
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON,
};
BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
@@ -5407,8 +5405,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
{
- sk = sk_to_full_sk(sk);
-
if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
return (unsigned long)sk;
@@ -5422,6 +5418,23 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = {
.arg1_type = ARG_PTR_TO_SOCK_COMMON,
};
+BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
+{
+ sk = sk_to_full_sk(sk);
+
+ if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+static const struct bpf_func_proto bpf_get_listener_sock_proto = {
+ .func = bpf_get_listener_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON,
+};
+
BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
{
unsigned int iphdr_len;
@@ -5607,6 +5620,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#ifdef CONFIG_INET
case BPF_FUNC_tcp_sock:
return &bpf_tcp_sock_proto;
+ case BPF_FUNC_get_listener_sock:
+ return &bpf_get_listener_sock_proto;
case BPF_FUNC_skb_ecn_set_ce:
return &bpf_skb_ecn_set_ce_proto;
#endif
@@ -5702,6 +5717,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_release_proto;
case BPF_FUNC_tcp_sock:
return &bpf_tcp_sock_proto;
+ case BPF_FUNC_get_listener_sock:
+ return &bpf_get_listener_sock_proto;
#endif
default:
return bpf_base_func_proto(func_id);
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index acf45ddbe924..e095fb871d91 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -13,22 +13,36 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct gro_cell *cell;
+ int res;
- if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev))
- return netif_rx(skb);
+ rcu_read_lock();
+ if (unlikely(!(dev->flags & IFF_UP)))
+ goto drop;
+
+ if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) {
+ res = netif_rx(skb);
+ goto unlock;
+ }
cell = this_cpu_ptr(gcells->cells);
if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
+drop:
atomic_long_inc(&dev->rx_dropped);
kfree_skb(skb);
- return NET_RX_DROP;
+ res = NET_RX_DROP;
+ goto unlock;
}
__skb_queue_tail(&cell->napi_skbs, skb);
if (skb_queue_len(&cell->napi_skbs) == 1)
napi_schedule(&cell->napi);
- return NET_RX_SUCCESS;
+
+ res = NET_RX_SUCCESS;
+
+unlock:
+ rcu_read_unlock();
+ return res;
}
EXPORT_SYMBOL(gro_cells_receive);
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index cf2f8897ca19..126d31ff5ee3 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -625,6 +625,8 @@ int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress)
/* push the encap headers and fix pointers */
skb_reset_inner_headers(skb);
+ skb_reset_inner_mac_header(skb); /* mac header is not yet set */
+ skb_set_inner_protocol(skb, skb->protocol);
skb->encapsulation = 1;
skb_push(skb, len);
if (ingress)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4ff661f6f989..f8f94303a1f5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -928,6 +928,8 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
if (error)
return error;
+ dev_hold(queue->dev);
+
if (dev->sysfs_rx_queue_group) {
error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
if (error) {
@@ -937,7 +939,6 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
}
kobject_uevent(kobj, KOBJ_ADD);
- dev_hold(queue->dev);
return error;
}
@@ -1464,6 +1465,8 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
if (error)
return error;
+ dev_hold(queue->dev);
+
#ifdef CONFIG_BQL
error = sysfs_create_group(kobj, &dql_group);
if (error) {
@@ -1473,7 +1476,6 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
#endif
kobject_uevent(kobj, KOBJ_ADD);
- dev_hold(queue->dev);
return 0;
}
@@ -1745,16 +1747,20 @@ int netdev_register_kobject(struct net_device *ndev)
error = device_add(dev);
if (error)
- return error;
+ goto error_put_device;
error = register_queue_kobjects(ndev);
- if (error) {
- device_del(dev);
- return error;
- }
+ if (error)
+ goto error_device_del;
pm_runtime_set_memalloc_noio(dev, true);
+ return 0;
+
+error_device_del:
+ device_del(dev);
+error_put_device:
+ put_device(dev);
return error;
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index ae6f06e45737..cc94d921476c 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -554,6 +554,7 @@ static void sk_psock_destroy_deferred(struct work_struct *gc)
struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
/* No sk_callback_lock since already detached. */
+ strp_stop(&psock->parser.strp);
strp_done(&psock->parser.strp);
cancel_work_sync(&psock->work);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d5740bad5b18..57d84e9b7b6f 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -436,8 +436,8 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
newnp->ipv6_fl_list = NULL;
- newnp->mcast_oif = inet6_iif(skb);
- newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
+ newnp->mcast_oif = inet_iif(skb);
+ newnp->mcast_hops = ip_hdr(skb)->ttl;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index b8cd43c9ed5b..a97bf326b231 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -94,9 +94,8 @@ static void hsr_check_announce(struct net_device *hsr_dev,
&& (old_operstate != IF_OPER_UP)) {
/* Went up */
hsr->announce_count = 0;
- hsr->announce_timer.expires = jiffies +
- msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
- add_timer(&hsr->announce_timer);
+ mod_timer(&hsr->announce_timer,
+ jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL));
}
if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP))
@@ -332,6 +331,7 @@ static void hsr_announce(struct timer_list *t)
{
struct hsr_priv *hsr;
struct hsr_port *master;
+ unsigned long interval;
hsr = from_timer(hsr, t, announce_timer);
@@ -343,18 +343,16 @@ static void hsr_announce(struct timer_list *t)
hsr->protVersion);
hsr->announce_count++;
- hsr->announce_timer.expires = jiffies +
- msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
+ interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
} else {
send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK,
hsr->protVersion);
- hsr->announce_timer.expires = jiffies +
- msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
+ interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
}
if (is_admin_up(master->dev))
- add_timer(&hsr->announce_timer);
+ mod_timer(&hsr->announce_timer, jiffies + interval);
rcu_read_unlock();
}
@@ -486,7 +484,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER);
if (res)
- return res;
+ goto err_add_port;
res = register_netdevice(hsr_dev);
if (res)
@@ -506,6 +504,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
fail:
hsr_for_each_port(hsr, port)
hsr_del_port(port);
+err_add_port:
+ hsr_del_node(&hsr->self_node_db);
return res;
}
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 286ceb41ac0c..9af16cb68f76 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -124,6 +124,18 @@ int hsr_create_self_node(struct list_head *self_node_db,
return 0;
}
+void hsr_del_node(struct list_head *self_node_db)
+{
+ struct hsr_node *node;
+
+ rcu_read_lock();
+ node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list);
+ rcu_read_unlock();
+ if (node) {
+ list_del_rcu(&node->mac_list);
+ kfree(node);
+ }
+}
/* Allocate an hsr_node and add it to node_db. 'addr' is the node's AddressA;
* seq_out is used to initialize filtering of outgoing duplicate frames
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 370b45998121..531fd3dfcac1 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -16,6 +16,7 @@
struct hsr_node;
+void hsr_del_node(struct list_head *self_node_db);
struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
u16 seq_out);
struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 437070d1ffb1..79e98e21cdd7 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -1024,7 +1024,7 @@ static int gue_err(struct sk_buff *skb, u32 info)
int ret;
len = sizeof(struct udphdr) + sizeof(struct guehdr);
- if (!pskb_may_pull(skb, len))
+ if (!pskb_may_pull(skb, transport_offset + len))
return -EINVAL;
guehdr = (struct guehdr *)&udp_hdr(skb)[1];
@@ -1059,7 +1059,7 @@ static int gue_err(struct sk_buff *skb, u32 info)
optlen = guehdr->hlen << 2;
- if (!pskb_may_pull(skb, len + optlen))
+ if (!pskb_may_pull(skb, transport_offset + len + optlen))
return -EINVAL;
guehdr = (struct guehdr *)&udp_hdr(skb)[1];
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 2756fb725bf0..a5d8cad18ead 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -515,9 +515,10 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
mtu = dst_mtu(&rt->dst) - dev->hard_header_len
- sizeof(struct iphdr) - tunnel_hlen;
else
- mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+ mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
- skb_dst_update_pmtu(skb, mtu);
+ if (skb_valid_dst(skb))
+ skb_dst_update_pmtu(skb, mtu);
if (skb->protocol == htons(ETH_P_IP)) {
if (!skb_is_gso(skb) &&
@@ -530,9 +531,11 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
}
#if IS_ENABLED(CONFIG_IPV6)
else if (skb->protocol == htons(ETH_P_IPV6)) {
- struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt6;
__be32 daddr;
+ rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
+ NULL;
daddr = md ? dst : tunnel->parms.iph.daddr;
if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 738ff0a1a048..a5da63e5faa2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1303,6 +1303,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
if (fnhe->fnhe_daddr == daddr) {
rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+ /* set fnhe_daddr to 0 to ensure it won't bind with
+ * new dsts in rt_bind_exception().
+ */
+ fnhe->fnhe_daddr = 0;
fnhe_flush_routes(fnhe);
kfree_rcu(fnhe, rcu);
break;
@@ -2149,12 +2153,13 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
int our = 0;
int err = -EINVAL;
- if (in_dev)
- our = ip_check_mc_rcu(in_dev, daddr, saddr,
- ip_hdr(skb)->protocol);
+ if (!in_dev)
+ return err;
+ our = ip_check_mc_rcu(in_dev, daddr, saddr,
+ ip_hdr(skb)->protocol);
/* check l3 master if no match yet */
- if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
+ if (!our && netif_is_l3_slave(dev)) {
struct in_device *l3_in_dev;
l3_in_dev = __in_dev_get_rcu(skb->dev);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 606f868d9f3f..e531344611a0 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -216,7 +216,12 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
refcount_set(&req->rsk_refcnt, 1);
tcp_sk(child)->tsoffset = tsoff;
sock_rps_save_rxhash(child, skb);
- inet_csk_reqsk_queue_add(sk, req, child);
+ if (!inet_csk_reqsk_queue_add(sk, req, child)) {
+ bh_unlock_sock(child);
+ sock_put(child);
+ child = NULL;
+ reqsk_put(req);
+ }
} else {
reqsk_free(req);
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ad07dd71063d..6baa6dc1b13b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -943,6 +943,10 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
ssize_t copied;
long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+ if (IS_ENABLED(CONFIG_DEBUG_VM) &&
+ WARN_ONCE(PageSlab(page), "page must not be a Slab one"))
+ return -EINVAL;
+
/* Wait for a connection to finish. One exception is TCP Fast Open
* (passive side) where data is allowed to be sent before a connection
* is fully established.
@@ -1933,6 +1937,11 @@ static int tcp_inq_hint(struct sock *sk)
inq = tp->rcv_nxt - tp->copied_seq;
release_sock(sk);
}
+ /* After receiving a FIN, tell the user-space to continue reading
+ * by returning a non-zero inq.
+ */
+ if (inq == 0 && sock_flag(sk, SOCK_DONE))
+ inq = 1;
return inq;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4eb0c8ca3c60..5def3c48870e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6498,7 +6498,13 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
af_ops->send_synack(fastopen_sk, dst, &fl, req,
&foc, TCP_SYNACK_FASTOPEN);
/* Add the child socket directly into the accept queue */
- inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
+ if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
+ reqsk_fastopen_remove(fastopen_sk, req, false);
+ bh_unlock_sock(fastopen_sk);
+ sock_put(fastopen_sk);
+ reqsk_put(req);
+ goto drop;
+ }
sk->sk_data_ready(sk);
bh_unlock_sock(fastopen_sk);
sock_put(fastopen_sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 831d844a27ca..277d71239d75 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1734,15 +1734,8 @@ EXPORT_SYMBOL(tcp_add_backlog);
int tcp_filter(struct sock *sk, struct sk_buff *skb)
{
struct tcphdr *th = (struct tcphdr *)skb->data;
- unsigned int eaten = skb->len;
- int err;
- err = sk_filter_trim_cap(sk, skb, th->doff * 4);
- if (!err) {
- eaten -= skb->len;
- TCP_SKB_CB(skb)->end_seq -= eaten;
- }
- return err;
+ return sk_filter_trim_cap(sk, skb, th->doff * 4);
}
EXPORT_SYMBOL(tcp_filter);
diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c
index 867474abe269..ec4e2ed95f36 100644
--- a/net/ipv6/fou6.c
+++ b/net/ipv6/fou6.c
@@ -94,7 +94,7 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
int ret;
len = sizeof(struct udphdr) + sizeof(struct guehdr);
- if (!pskb_may_pull(skb, len))
+ if (!pskb_may_pull(skb, transport_offset + len))
return -EINVAL;
guehdr = (struct guehdr *)&udp_hdr(skb)[1];
@@ -129,7 +129,7 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
optlen = guehdr->hlen << 2;
- if (!pskb_may_pull(skb, len + optlen))
+ if (!pskb_may_pull(skb, transport_offset + len + optlen))
return -EINVAL;
guehdr = (struct guehdr *)&udp_hdr(skb)[1];
diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c
index 1059894a6f4c..4cb83fb69844 100644
--- a/net/ipv6/netfilter/ip6t_srh.c
+++ b/net/ipv6/netfilter/ip6t_srh.c
@@ -210,6 +210,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par)
psidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
((srh->segments_left + 1) * sizeof(struct in6_addr));
psid = skb_header_pointer(skb, psidoff, sizeof(_psid), &_psid);
+ if (!psid)
+ return false;
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_PSID,
ipv6_masked_addr_cmp(psid, &srhinfo->psid_msk,
&srhinfo->psid_addr)))
@@ -223,6 +225,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par)
nsidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
((srh->segments_left - 1) * sizeof(struct in6_addr));
nsid = skb_header_pointer(skb, nsidoff, sizeof(_nsid), &_nsid);
+ if (!nsid)
+ return false;
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NSID,
ipv6_masked_addr_cmp(nsid, &srhinfo->nsid_msk,
&srhinfo->nsid_addr)))
@@ -233,6 +237,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par)
if (srhinfo->mt_flags & IP6T_SRH_LSID) {
lsidoff = srhoff + sizeof(struct ipv6_sr_hdr);
lsid = skb_header_pointer(skb, lsidoff, sizeof(_lsid), &_lsid);
+ if (!lsid)
+ return false;
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LSID,
ipv6_masked_addr_cmp(lsid, &srhinfo->lsid_msk,
&srhinfo->lsid_addr)))
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4ef4bbdb49d4..0302e0eb07af 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1040,14 +1040,20 @@ static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
struct rt6_info *nrt;
if (!fib6_info_hold_safe(rt))
- return NULL;
+ goto fallback;
nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
- if (nrt)
- ip6_rt_copy_init(nrt, rt);
- else
+ if (!nrt) {
fib6_info_release(rt);
+ goto fallback;
+ }
+ ip6_rt_copy_init(nrt, rt);
+ return nrt;
+
+fallback:
+ nrt = dev_net(dev)->ipv6.ip6_null_entry;
+ dst_hold(&nrt->dst);
return nrt;
}
@@ -1096,10 +1102,6 @@ restart:
dst_hold(&rt->dst);
} else {
rt = ip6_create_rt_rcu(f6i);
- if (!rt) {
- rt = net->ipv6.ip6_null_entry;
- dst_hold(&rt->dst);
- }
}
rcu_read_unlock();
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 09e440e8dfae..07e21a82ce4c 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -778,8 +778,9 @@ static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
pbw0 = tunnel->ip6rd.prefixlen >> 5;
pbi0 = tunnel->ip6rd.prefixlen & 0x1f;
- d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
- tunnel->ip6rd.relay_prefixlen;
+ d = tunnel->ip6rd.relay_prefixlen < 32 ?
+ (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
+ tunnel->ip6rd.relay_prefixlen : 0;
pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen;
if (pbi1 > 0)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 57ef69a10889..44d431849d39 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1110,11 +1110,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
newnp->opt = NULL;
- newnp->mcast_oif = tcp_v6_iif(skb);
- newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
- newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
+ newnp->mcast_oif = inet_iif(skb);
+ newnp->mcast_hops = ip_hdr(skb)->ttl;
+ newnp->rcv_flowinfo = 0;
if (np->repflow)
- newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
+ newnp->flow_label = 0;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 0ae6899edac0..37a69df17cab 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -674,9 +674,6 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (flags & MSG_OOB)
goto out;
- if (addr_len)
- *addr_len = sizeof(*lsa);
-
if (flags & MSG_ERRQUEUE)
return ipv6_recv_error(sk, msg, len, addr_len);
@@ -706,6 +703,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
lsa->l2tp_conn_id = 0;
if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
lsa->l2tp_scope_id = inet6_iif(skb);
+ *addr_len = sizeof(*lsa);
}
if (np->rxopt.all)
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index dda8930f20e7..f3a8557494d6 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -140,9 +140,15 @@ static int mpls_xmit(struct sk_buff *skb)
if (rt)
err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gateway,
skb);
- else if (rt6)
- err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway,
- skb);
+ else if (rt6) {
+ if (ipv6_addr_v4mapped(&rt6->rt6i_gateway)) {
+ /* 6PE (RFC 4798) */
+ err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt6->rt6i_gateway.s6_addr32[3],
+ skb);
+ } else
+ err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway,
+ skb);
+ }
if (err)
net_dbg_ratelimited("%s: packet transmission failed: %d\n",
__func__, err);
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 5d782445d2fc..bad17bba8ba7 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -251,6 +251,10 @@ static int ncsi_pkg_info_all_nl(struct sk_buff *skb,
}
attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST);
+ if (!attr) {
+ rc = -EMSGSIZE;
+ goto err;
+ }
rc = ncsi_write_package_info(skb, ndp, package->id);
if (rc) {
nla_nest_cancel(skb, attr);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index d43ffb09939b..6548271209a0 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1007,6 +1007,7 @@ config NETFILTER_XT_TARGET_TEE
depends on NETFILTER_ADVANCED
depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
+ depends on IP6_NF_IPTABLES || !IP6_NF_IPTABLES
select NF_DUP_IPV4
select NF_DUP_IPV6 if IP6_NF_IPTABLES
---help---
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index f067c6b50857..39fcc1ed18f3 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -20,9 +20,9 @@
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
-#include <net/route.h>
-#include <net/ip6_route.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_expect.h>
@@ -871,38 +871,33 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
} else if (sip_external_media) {
struct net_device *dev = skb_dst(skb)->dev;
struct net *net = dev_net(dev);
- struct rtable *rt;
- struct flowi4 fl4 = {};
-#if IS_ENABLED(CONFIG_IPV6)
- struct flowi6 fl6 = {};
-#endif
+ struct flowi fl;
struct dst_entry *dst = NULL;
+ memset(&fl, 0, sizeof(fl));
+
switch (nf_ct_l3num(ct)) {
case NFPROTO_IPV4:
- fl4.daddr = daddr->ip;
- rt = ip_route_output_key(net, &fl4);
- if (!IS_ERR(rt))
- dst = &rt->dst;
+ fl.u.ip4.daddr = daddr->ip;
+ nf_ip_route(net, &dst, &fl, false);
break;
-#if IS_ENABLED(CONFIG_IPV6)
case NFPROTO_IPV6:
- fl6.daddr = daddr->in6;
- dst = ip6_route_output(net, NULL, &fl6);
- if (dst->error) {
- dst_release(dst);
- dst = NULL;
- }
+ fl.u.ip6.daddr = daddr->in6;
+ nf_ip6_route(net, &dst, &fl, false);
break;
-#endif
}
/* Don't predict any conntracks when media endpoint is reachable
* through the same interface as the signalling peer.
*/
- if (dst && dst->dev == dev)
- return NF_ACCEPT;
+ if (dst) {
+ bool external_media = (dst->dev == dev);
+
+ dst_release(dst);
+ if (external_media)
+ return NF_ACCEPT;
+ }
}
/* We need to check whether the registration exists before attempting
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 86fa4dcc63c5..d85c4d902e7b 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -11,7 +11,8 @@
#include <net/netfilter/ipv6/nf_nat_masquerade.h>
static DEFINE_MUTEX(masq_mutex);
-static unsigned int masq_refcnt __read_mostly;
+static unsigned int masq_refcnt4 __read_mostly;
+static unsigned int masq_refcnt6 __read_mostly;
unsigned int
nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
@@ -141,8 +142,13 @@ int nf_nat_masquerade_ipv4_register_notifier(void)
int ret = 0;
mutex_lock(&masq_mutex);
+ if (WARN_ON_ONCE(masq_refcnt4 == UINT_MAX)) {
+ ret = -EOVERFLOW;
+ goto out_unlock;
+ }
+
/* check if the notifier was already set */
- if (++masq_refcnt > 1)
+ if (++masq_refcnt4 > 1)
goto out_unlock;
/* Register for device down reports */
@@ -160,7 +166,7 @@ int nf_nat_masquerade_ipv4_register_notifier(void)
err_unregister:
unregister_netdevice_notifier(&masq_dev_notifier);
err_dec:
- masq_refcnt--;
+ masq_refcnt4--;
out_unlock:
mutex_unlock(&masq_mutex);
return ret;
@@ -171,7 +177,7 @@ void nf_nat_masquerade_ipv4_unregister_notifier(void)
{
mutex_lock(&masq_mutex);
/* check if the notifier still has clients */
- if (--masq_refcnt > 0)
+ if (--masq_refcnt4 > 0)
goto out_unlock;
unregister_netdevice_notifier(&masq_dev_notifier);
@@ -321,25 +327,23 @@ int nf_nat_masquerade_ipv6_register_notifier(void)
int ret = 0;
mutex_lock(&masq_mutex);
- /* check if the notifier is already set */
- if (++masq_refcnt > 1)
+ if (WARN_ON_ONCE(masq_refcnt6 == UINT_MAX)) {
+ ret = -EOVERFLOW;
goto out_unlock;
+ }
- ret = register_netdevice_notifier(&masq_dev_notifier);
- if (ret)
- goto err_dec;
+ /* check if the notifier is already set */
+ if (++masq_refcnt6 > 1)
+ goto out_unlock;
ret = register_inet6addr_notifier(&masq_inet6_notifier);
if (ret)
- goto err_unregister;
+ goto err_dec;
mutex_unlock(&masq_mutex);
return ret;
-
-err_unregister:
- unregister_netdevice_notifier(&masq_dev_notifier);
err_dec:
- masq_refcnt--;
+ masq_refcnt6--;
out_unlock:
mutex_unlock(&masq_mutex);
return ret;
@@ -350,11 +354,10 @@ void nf_nat_masquerade_ipv6_unregister_notifier(void)
{
mutex_lock(&masq_mutex);
/* check if the notifier still has clients */
- if (--masq_refcnt > 0)
+ if (--masq_refcnt6 > 0)
goto out_unlock;
unregister_inet6addr_notifier(&masq_inet6_notifier);
- unregister_netdevice_notifier(&masq_dev_notifier);
out_unlock:
mutex_unlock(&masq_mutex);
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index faf6bd10a19f..ef7772e976cc 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -142,7 +142,7 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
if (trans->msg_type == NFT_MSG_NEWSET &&
nft_trans_set(trans) == set) {
- nft_trans_set_bound(trans) = true;
+ set->bound = true;
break;
}
}
@@ -2162,9 +2162,11 @@ err1:
static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
struct nft_expr *expr)
{
+ const struct nft_expr_type *type = expr->ops->type;
+
if (expr->ops->destroy)
expr->ops->destroy(ctx, expr);
- module_put(expr->ops->type->owner);
+ module_put(type->owner);
}
struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
@@ -2804,8 +2806,11 @@ err2:
nf_tables_rule_release(&ctx, rule);
err1:
for (i = 0; i < n; i++) {
- if (info[i].ops != NULL)
+ if (info[i].ops) {
module_put(info[i].ops->type->owner);
+ if (info[i].ops->type->release_ops)
+ info[i].ops->type->release_ops(info[i].ops);
+ }
}
kvfree(info);
return err;
@@ -3672,6 +3677,9 @@ err1:
static void nft_set_destroy(struct nft_set *set)
{
+ if (WARN_ON(set->use > 0))
+ return;
+
set->ops->destroy(set);
module_put(to_set_type(set->ops)->owner);
kfree(set->name);
@@ -3712,7 +3720,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(set);
}
- if (!list_empty(&set->bindings) ||
+ if (set->use ||
(nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) {
NL_SET_BAD_ATTR(extack, attr);
return -EBUSY;
@@ -3742,6 +3750,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *i;
struct nft_set_iter iter;
+ if (set->use == UINT_MAX)
+ return -EOVERFLOW;
+
if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
return -EBUSY;
@@ -3769,6 +3780,7 @@ bind:
binding->chain = ctx->chain;
list_add_tail_rcu(&binding->list, &set->bindings);
nft_set_trans_bind(ctx, set);
+ set->use++;
return 0;
}
@@ -3788,6 +3800,25 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
}
EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
+void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *binding,
+ enum nft_trans_phase phase)
+{
+ switch (phase) {
+ case NFT_TRANS_PREPARE:
+ set->use--;
+ return;
+ case NFT_TRANS_ABORT:
+ case NFT_TRANS_RELEASE:
+ set->use--;
+ /* fall through */
+ default:
+ nf_tables_unbind_set(ctx, set, binding,
+ phase == NFT_TRANS_COMMIT);
+ }
+}
+EXPORT_SYMBOL_GPL(nf_tables_deactivate_set);
+
void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
{
if (list_empty(&set->bindings) && nft_set_is_anonymous(set))
@@ -6536,6 +6567,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
struct nft_chain *chain;
struct nft_table *table;
+ if (list_empty(&net->nft.commit_list)) {
+ mutex_unlock(&net->nft.commit_mutex);
+ return 0;
+ }
+
/* 0. Validate ruleset, otherwise roll back for error reporting. */
if (nf_tables_validate(net) < 0)
return -EAGAIN;
@@ -6709,8 +6745,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
break;
case NFT_MSG_NEWSET:
- if (!nft_trans_set_bound(trans))
- nft_set_destroy(nft_trans_set(trans));
+ nft_set_destroy(nft_trans_set(trans));
break;
case NFT_MSG_NEWSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
@@ -6783,8 +6818,11 @@ static int __nf_tables_abort(struct net *net)
break;
case NFT_MSG_NEWSET:
trans->ctx.table->use--;
- if (!nft_trans_set_bound(trans))
- list_del_rcu(&nft_trans_set(trans)->list);
+ if (nft_trans_set(trans)->bound) {
+ nft_trans_destroy(trans);
+ break;
+ }
+ list_del_rcu(&nft_trans_set(trans)->list);
break;
case NFT_MSG_DELSET:
trans->ctx.table->use++;
@@ -6792,8 +6830,11 @@ static int __nf_tables_abort(struct net *net)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
+ if (nft_trans_elem_set(trans)->bound) {
+ nft_trans_destroy(trans);
+ break;
+ }
te = (struct nft_trans_elem *)trans->data;
-
te->set->ops->remove(net, te->set, &te->elem);
atomic_dec(&te->set->nelems);
break;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index a8a74a16f9c4..e461007558e8 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -240,11 +240,15 @@ static void nft_dynset_deactivate(const struct nft_ctx *ctx,
{
struct nft_dynset *priv = nft_expr_priv(expr);
- if (phase == NFT_TRANS_PREPARE)
- return;
+ nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_dynset_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_dynset *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding,
- phase == NFT_TRANS_COMMIT);
+ priv->set->use++;
}
static void nft_dynset_destroy(const struct nft_ctx *ctx,
@@ -292,6 +296,7 @@ static const struct nft_expr_ops nft_dynset_ops = {
.eval = nft_dynset_eval,
.init = nft_dynset_init,
.destroy = nft_dynset_destroy,
+ .activate = nft_dynset_activate,
.deactivate = nft_dynset_deactivate,
.dump = nft_dynset_dump,
};
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 14496da5141d..161c3451a747 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -127,11 +127,15 @@ static void nft_lookup_deactivate(const struct nft_ctx *ctx,
{
struct nft_lookup *priv = nft_expr_priv(expr);
- if (phase == NFT_TRANS_PREPARE)
- return;
+ nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_lookup_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_lookup *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding,
- phase == NFT_TRANS_COMMIT);
+ priv->set->use++;
}
static void nft_lookup_destroy(const struct nft_ctx *ctx,
@@ -222,6 +226,7 @@ static const struct nft_expr_ops nft_lookup_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
.eval = nft_lookup_eval,
.init = nft_lookup_init,
+ .activate = nft_lookup_activate,
.deactivate = nft_lookup_deactivate,
.destroy = nft_lookup_destroy,
.dump = nft_lookup_dump,
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 79ef074c18ca..8dfa798ea683 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -65,21 +65,34 @@ nla_put_failure:
return -1;
}
-static void nft_objref_destroy(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
+static void nft_objref_deactivate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
{
struct nft_object *obj = nft_objref_priv(expr);
+ if (phase == NFT_TRANS_COMMIT)
+ return;
+
obj->use--;
}
+static void nft_objref_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_object *obj = nft_objref_priv(expr);
+
+ obj->use++;
+}
+
static struct nft_expr_type nft_objref_type;
static const struct nft_expr_ops nft_objref_ops = {
.type = &nft_objref_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_object *)),
.eval = nft_objref_eval,
.init = nft_objref_init,
- .destroy = nft_objref_destroy,
+ .activate = nft_objref_activate,
+ .deactivate = nft_objref_deactivate,
.dump = nft_objref_dump,
};
@@ -162,11 +175,15 @@ static void nft_objref_map_deactivate(const struct nft_ctx *ctx,
{
struct nft_objref_map *priv = nft_expr_priv(expr);
- if (phase == NFT_TRANS_PREPARE)
- return;
+ nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_objref_map_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_objref_map *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding,
- phase == NFT_TRANS_COMMIT);
+ priv->set->use++;
}
static void nft_objref_map_destroy(const struct nft_ctx *ctx,
@@ -183,6 +200,7 @@ static const struct nft_expr_ops nft_objref_map_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),
.eval = nft_objref_map_eval,
.init = nft_objref_map_init,
+ .activate = nft_objref_map_activate,
.deactivate = nft_objref_map_deactivate,
.destroy = nft_objref_map_destroy,
.dump = nft_objref_map_dump,
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index f8092926f704..a340cd8a751b 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -233,5 +233,5 @@ module_exit(nft_redir_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET4, "redir");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir");
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index fa61208371f8..321a0036fdf5 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -308,10 +308,6 @@ static void *nft_rbtree_deactivate(const struct net *net,
else if (d > 0)
parent = parent->rb_right;
else {
- if (!nft_set_elem_active(&rbe->ext, genmask)) {
- parent = parent->rb_left;
- continue;
- }
if (nft_rbtree_interval_end(rbe) &&
!nft_rbtree_interval_end(this)) {
parent = parent->rb_left;
@@ -320,6 +316,9 @@ static void *nft_rbtree_deactivate(const struct net *net,
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
+ } else if (!nft_set_elem_active(&rbe->ext, genmask)) {
+ parent = parent->rb_left;
+ continue;
}
nft_rbtree_flush(net, set, rbe);
return rbe;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 25eeb6d2a75a..f0ec068e1d02 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -366,7 +366,7 @@ int genl_register_family(struct genl_family *family)
start, end + 1, GFP_KERNEL);
if (family->id < 0) {
err = family->id;
- goto errout_locked;
+ goto errout_free;
}
err = genl_validate_assign_mc_groups(family);
@@ -385,6 +385,7 @@ int genl_register_family(struct genl_family *family)
errout_remove:
idr_remove(&genl_fam_idr, family->id);
+errout_free:
kfree(family->attrbuf);
errout_locked:
genl_unlock_all();
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index ae296273ce3d..17dcd0b5eb32 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -726,6 +726,10 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
llcp_sock->service_name = kmemdup(addr->service_name,
llcp_sock->service_name_len,
GFP_KERNEL);
+ if (!llcp_sock->service_name) {
+ ret = -ENOMEM;
+ goto sock_llcp_release;
+ }
nfc_llcp_sock_link(&local->connecting_sockets, sk);
@@ -745,10 +749,11 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
return ret;
sock_unlink:
- nfc_llcp_put_ssap(local, llcp_sock->ssap);
-
nfc_llcp_sock_unlink(&local->connecting_sockets, sk);
+sock_llcp_release:
+ nfc_llcp_put_ssap(local, llcp_sock->ssap);
+
put_dev:
nfc_put_device(dev);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 6679e96ab1dc..9dd158ab51b3 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -448,6 +448,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
0, upcall_info->cmd);
+ if (!upcall) {
+ err = -EINVAL;
+ goto out;
+ }
upcall->dp_ifindex = dp_ifindex;
err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
@@ -460,6 +464,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
if (upcall_info->egress_tun_info) {
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
+ if (!nla) {
+ err = -EMSGSIZE;
+ goto out;
+ }
err = ovs_nla_put_tunnel_info(user_skb,
upcall_info->egress_tun_info);
BUG_ON(err);
@@ -468,6 +476,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
if (upcall_info->actions_len) {
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
+ if (!nla) {
+ err = -EMSGSIZE;
+ goto out;
+ }
err = ovs_nla_put_actions(upcall_info->actions,
upcall_info->actions_len,
user_skb);
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index ba01fc4270bd..5b8e5bd7457b 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -30,7 +30,6 @@
#include <linux/in6.h>
#include <linux/jiffies.h>
#include <linux/time.h>
-#include <linux/flex_array.h>
#include <linux/cpumask.h>
#include <net/inet_ecn.h>
#include <net/ip_tunnels.h>
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 6657606b2b47..66f9553758a5 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -30,7 +30,6 @@
#include <linux/in6.h>
#include <linux/jiffies.h>
#include <linux/time.h>
-#include <linux/flex_array.h>
#include <net/inet_ecn.h>
#include <net/ip_tunnels.h>
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 80ea2a71852e..cfb0098c9a01 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -111,29 +111,6 @@ int ovs_flow_tbl_count(const struct flow_table *table)
return table->count;
}
-static struct flex_array *alloc_buckets(unsigned int n_buckets)
-{
- struct flex_array *buckets;
- int i, err;
-
- buckets = flex_array_alloc(sizeof(struct hlist_head),
- n_buckets, GFP_KERNEL);
- if (!buckets)
- return NULL;
-
- err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL);
- if (err) {
- flex_array_free(buckets);
- return NULL;
- }
-
- for (i = 0; i < n_buckets; i++)
- INIT_HLIST_HEAD((struct hlist_head *)
- flex_array_get(buckets, i));
-
- return buckets;
-}
-
static void flow_free(struct sw_flow *flow)
{
int cpu;
@@ -168,31 +145,30 @@ void ovs_flow_free(struct sw_flow *flow, bool deferred)
flow_free(flow);
}
-static void free_buckets(struct flex_array *buckets)
-{
- flex_array_free(buckets);
-}
-
-
static void __table_instance_destroy(struct table_instance *ti)
{
- free_buckets(ti->buckets);
+ kvfree(ti->buckets);
kfree(ti);
}
static struct table_instance *table_instance_alloc(int new_size)
{
struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+ int i;
if (!ti)
return NULL;
- ti->buckets = alloc_buckets(new_size);
-
+ ti->buckets = kvmalloc_array(new_size, sizeof(struct hlist_head),
+ GFP_KERNEL);
if (!ti->buckets) {
kfree(ti);
return NULL;
}
+
+ for (i = 0; i < new_size; i++)
+ INIT_HLIST_HEAD(&ti->buckets[i]);
+
ti->n_buckets = new_size;
ti->node_ver = 0;
ti->keep_flows = false;
@@ -249,7 +225,7 @@ static void table_instance_destroy(struct table_instance *ti,
for (i = 0; i < ti->n_buckets; i++) {
struct sw_flow *flow;
- struct hlist_head *head = flex_array_get(ti->buckets, i);
+ struct hlist_head *head = &ti->buckets[i];
struct hlist_node *n;
int ver = ti->node_ver;
int ufid_ver = ufid_ti->node_ver;
@@ -294,7 +270,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
ver = ti->node_ver;
while (*bucket < ti->n_buckets) {
i = 0;
- head = flex_array_get(ti->buckets, *bucket);
+ head = &ti->buckets[*bucket];
hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) {
if (i < *last) {
i++;
@@ -313,8 +289,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
{
hash = jhash_1word(hash, ti->hash_seed);
- return flex_array_get(ti->buckets,
- (hash & (ti->n_buckets - 1)));
+ return &ti->buckets[hash & (ti->n_buckets - 1)];
}
static void table_instance_insert(struct table_instance *ti,
@@ -347,9 +322,7 @@ static void flow_table_copy_flows(struct table_instance *old,
/* Insert in new table. */
for (i = 0; i < old->n_buckets; i++) {
struct sw_flow *flow;
- struct hlist_head *head;
-
- head = flex_array_get(old->buckets, i);
+ struct hlist_head *head = &old->buckets[i];
if (ufid)
hlist_for_each_entry(flow, head,
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 2dd9900f533d..de5ec6cf5174 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -29,7 +29,6 @@
#include <linux/in6.h>
#include <linux/jiffies.h>
#include <linux/time.h>
-#include <linux/flex_array.h>
#include <net/inet_ecn.h>
#include <net/ip_tunnels.h>
@@ -37,7 +36,7 @@
#include "flow.h"
struct table_instance {
- struct flex_array *buckets;
+ struct hlist_head *buckets;
unsigned int n_buckets;
struct rcu_head rcu;
int node_ver;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8376bc1c1508..9419c5cf4de5 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1852,7 +1852,8 @@ oom:
static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
{
- if (!skb->protocol && sock->type == SOCK_RAW) {
+ if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
+ sock->type == SOCK_RAW) {
skb_reset_mac_header(skb);
skb->protocol = dev_parse_header_protocol(skb);
}
@@ -3243,7 +3244,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
}
mutex_lock(&net->packet.sklist_lock);
- sk_add_node_rcu(sk, &net->packet.sklist);
+ sk_add_node_tail_rcu(sk, &net->packet.sklist);
mutex_unlock(&net->packet.sklist_lock);
preempt_disable();
@@ -4209,7 +4210,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
struct pgv *pg_vec;
int i;
- pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
+ pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN);
if (unlikely(!pg_vec))
goto out;
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 752f92235a38..67a715b076ca 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -333,10 +333,8 @@ static inline void rds_ib_dma_sync_sg_for_cpu(struct ib_device *dev,
unsigned int i;
for_each_sg(sglist, sg, sg_dma_len, i) {
- ib_dma_sync_single_for_cpu(dev,
- ib_sg_dma_address(dev, sg),
- ib_sg_dma_len(dev, sg),
- direction);
+ ib_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
+ sg_dma_len(sg), direction);
}
}
#define ib_dma_sync_sg_for_cpu rds_ib_dma_sync_sg_for_cpu
@@ -350,10 +348,8 @@ static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
unsigned int i;
for_each_sg(sglist, sg, sg_dma_len, i) {
- ib_dma_sync_single_for_device(dev,
- ib_sg_dma_address(dev, sg),
- ib_sg_dma_len(dev, sg),
- direction);
+ ib_dma_sync_single_for_device(dev, sg_dma_address(sg),
+ sg_dma_len(sg), direction);
}
}
#define ib_dma_sync_sg_for_device rds_ib_dma_sync_sg_for_device
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
index e0f70c4051b6..31cf37da4510 100644
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -108,8 +108,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
page_cnt = 0;
for (i = 0; i < sg_dma_len; ++i) {
- unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
- u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
+ unsigned int dma_len = sg_dma_len(&scat[i]);
+ u64 dma_addr = sg_dma_address(&scat[i]);
if (dma_addr & ~PAGE_MASK) {
if (i > 0) {
@@ -148,8 +148,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
page_cnt = 0;
for (i = 0; i < sg_dma_len; ++i) {
- unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
- u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
+ unsigned int dma_len = sg_dma_len(&scat[i]);
+ u64 dma_addr = sg_dma_address(&scat[i]);
for (j = 0; j < dma_len; j += PAGE_SIZE)
dma_pages[page_cnt++] =
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 6431a023ac89..688dcd68d4ea 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -181,8 +181,8 @@ static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev,
ret = -EINVAL;
for (i = 0; i < ibmr->sg_dma_len; ++i) {
- unsigned int dma_len = ib_sg_dma_len(dev, &ibmr->sg[i]);
- u64 dma_addr = ib_sg_dma_address(dev, &ibmr->sg[i]);
+ unsigned int dma_len = sg_dma_len(&ibmr->sg[i]);
+ u64 dma_addr = sg_dma_address(&ibmr->sg[i]);
frmr->sg_byte_len += dma_len;
if (dma_addr & ~PAGE_MASK) {
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index d395eec98959..70559854837e 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -346,8 +346,8 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
sge->length = sizeof(struct rds_header);
sge = &recv->r_sge[1];
- sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg);
- sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg);
+ sge->addr = sg_dma_address(&recv->r_frag->f_sg);
+ sge->length = sg_dma_len(&recv->r_frag->f_sg);
ret = 0;
out:
@@ -409,9 +409,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
- (long) ib_sg_dma_address(
- ic->i_cm_id->device,
- &recv->r_frag->f_sg));
+ (long)sg_dma_address(&recv->r_frag->f_sg));
/* XXX when can this fail? */
ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL);
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 09c46f2e97fa..18f2341202f8 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -646,16 +646,16 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
if (i < work_alloc
&& scat != &rm->data.op_sg[rm->data.op_count]) {
len = min(RDS_FRAG_SIZE,
- ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
+ sg_dma_len(scat) - rm->data.op_dmaoff);
send->s_wr.num_sge = 2;
- send->s_sge[1].addr = ib_sg_dma_address(dev, scat);
+ send->s_sge[1].addr = sg_dma_address(scat);
send->s_sge[1].addr += rm->data.op_dmaoff;
send->s_sge[1].length = len;
bytes_sent += len;
rm->data.op_dmaoff += len;
- if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
+ if (rm->data.op_dmaoff == sg_dma_len(scat)) {
scat++;
rm->data.op_dmasg++;
rm->data.op_dmaoff = 0;
@@ -809,8 +809,8 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
}
/* Convert our struct scatterlist to struct ib_sge */
- send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg);
- send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg);
+ send->s_sge[0].addr = sg_dma_address(op->op_sg);
+ send->s_sge[0].length = sg_dma_len(op->op_sg);
send->s_sge[0].lkey = ic->i_pd->local_dma_lkey;
rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
@@ -922,9 +922,8 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
scat != &op->op_sg[op->op_count]; j++) {
- len = ib_sg_dma_len(ic->i_cm_id->device, scat);
- send->s_sge[j].addr =
- ib_sg_dma_address(ic->i_cm_id->device, scat);
+ len = sg_dma_len(scat);
+ send->s_sge[j].addr = sg_dma_address(scat);
send->s_sge[j].length = len;
send->s_sge[j].lkey = ic->i_pd->local_dma_lkey;
diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
index 7ca57741b2fb..7849f286bb93 100644
--- a/net/rose/rose_subr.c
+++ b/net/rose/rose_subr.c
@@ -105,16 +105,17 @@ void rose_write_internal(struct sock *sk, int frametype)
struct sk_buff *skb;
unsigned char *dptr;
unsigned char lci1, lci2;
- char buffer[100];
- int len, faclen = 0;
+ int maxfaclen = 0;
+ int len, faclen;
+ int reserve;
- len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 1;
+ reserve = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1;
+ len = ROSE_MIN_LEN;
switch (frametype) {
case ROSE_CALL_REQUEST:
len += 1 + ROSE_ADDR_LEN + ROSE_ADDR_LEN;
- faclen = rose_create_facilities(buffer, rose);
- len += faclen;
+ maxfaclen = 256;
break;
case ROSE_CALL_ACCEPTED:
case ROSE_CLEAR_REQUEST:
@@ -123,15 +124,16 @@ void rose_write_internal(struct sock *sk, int frametype)
break;
}
- if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
+ skb = alloc_skb(reserve + len + maxfaclen, GFP_ATOMIC);
+ if (!skb)
return;
/*
* Space for AX.25 header and PID.
*/
- skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1);
+ skb_reserve(skb, reserve);
- dptr = skb_put(skb, skb_tailroom(skb));
+ dptr = skb_put(skb, len);
lci1 = (rose->lci >> 8) & 0x0F;
lci2 = (rose->lci >> 0) & 0xFF;
@@ -146,7 +148,8 @@ void rose_write_internal(struct sock *sk, int frametype)
dptr += ROSE_ADDR_LEN;
memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN);
dptr += ROSE_ADDR_LEN;
- memcpy(dptr, buffer, faclen);
+ faclen = rose_create_facilities(dptr, rose);
+ skb_put(skb, faclen);
dptr += faclen;
break;
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index b2adfa825363..83797b3949e2 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -353,7 +353,7 @@ static int rxrpc_get_client_conn(struct rxrpc_sock *rx,
* normally have to take channel_lock but we do this before anyone else
* can see the connection.
*/
- list_add_tail(&call->chan_wait_link, &candidate->waiting_calls);
+ list_add(&call->chan_wait_link, &candidate->waiting_calls);
if (cp->exclusive) {
call->conn = candidate;
@@ -432,7 +432,7 @@ found_extant_conn:
call->conn = conn;
call->security_ix = conn->security_ix;
call->service_id = conn->service_id;
- list_add(&call->chan_wait_link, &conn->waiting_calls);
+ list_add_tail(&call->chan_wait_link, &conn->waiting_calls);
spin_unlock(&conn->channel_lock);
_leave(" = 0 [extant %d]", conn->debug_id);
return 0;
@@ -704,6 +704,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
ret = rxrpc_wait_for_channel(call, gfp);
if (ret < 0) {
+ trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed);
rxrpc_disconnect_client_call(call);
goto out;
}
@@ -774,16 +775,22 @@ static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet)
*/
void rxrpc_disconnect_client_call(struct rxrpc_call *call)
{
- unsigned int channel = call->cid & RXRPC_CHANNELMASK;
struct rxrpc_connection *conn = call->conn;
- struct rxrpc_channel *chan = &conn->channels[channel];
+ struct rxrpc_channel *chan = NULL;
struct rxrpc_net *rxnet = conn->params.local->rxnet;
+ unsigned int channel = -1;
+ u32 cid;
+ spin_lock(&conn->channel_lock);
+
+ cid = call->cid;
+ if (cid) {
+ channel = cid & RXRPC_CHANNELMASK;
+ chan = &conn->channels[channel];
+ }
trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
call->conn = NULL;
- spin_lock(&conn->channel_lock);
-
/* Calls that have never actually been assigned a channel can simply be
* discarded. If the conn didn't get used either, it will follow
* immediately unless someone else grabs it in the meantime.
@@ -807,7 +814,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
goto out;
}
- ASSERTCMP(rcu_access_pointer(chan->call), ==, call);
+ if (rcu_access_pointer(chan->call) != call) {
+ spin_unlock(&conn->channel_lock);
+ BUG();
+ }
/* If a client call was exposed to the world, we save the result for
* retransmission.
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 736aa9281100..004c762c2e8d 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -335,7 +335,6 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
struct kvec iov[2];
rxrpc_serial_t serial;
size_t len;
- bool lost = false;
int ret, opt;
_enter(",{%d}", skb->len);
@@ -393,14 +392,14 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
static int lose;
if ((lose++ & 7) == 7) {
ret = 0;
- lost = true;
+ trace_rxrpc_tx_data(call, sp->hdr.seq, serial,
+ whdr.flags, retrans, true);
+ goto done;
}
}
- trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
- retrans, lost);
- if (lost)
- goto done;
+ trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, retrans,
+ false);
/* send the packet with the don't fragment bit set if we currently
* think it's small enough */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 1b9afdee5ba9..5c02ad97ef23 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -358,8 +358,7 @@ config NET_SCH_PIE
help
Say Y here if you want to use the Proportional Integral controller
Enhanced scheduler packet scheduling algorithm.
- For more information, please see
- http://tools.ietf.org/html/draft-pan-tsvwg-pie-00
+ For more information, please see https://tools.ietf.org/html/rfc8033
To compile this driver as a module, choose M here: the module
will be called sch_pie.
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index aecf1bf233c8..5a87e271d35a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -28,27 +28,10 @@
#include <net/act_api.h>
#include <net/netlink.h>
-static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
-{
- u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK;
-
- if (!tp)
- return -EINVAL;
- a->goto_chain = tcf_chain_get_by_act(tp->chain->block, chain_index);
- if (!a->goto_chain)
- return -ENOMEM;
- return 0;
-}
-
-static void tcf_action_goto_chain_fini(struct tc_action *a)
-{
- tcf_chain_put_by_act(a->goto_chain);
-}
-
static void tcf_action_goto_chain_exec(const struct tc_action *a,
struct tcf_result *res)
{
- const struct tcf_chain *chain = a->goto_chain;
+ const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain);
res->goto_tp = rcu_dereference_bh(chain->filter_chain);
}
@@ -71,6 +54,51 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
call_rcu(&old->rcu, tcf_free_cookie_rcu);
}
+int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
+ struct tcf_chain **newchain,
+ struct netlink_ext_ack *extack)
+{
+ int opcode = TC_ACT_EXT_OPCODE(action), ret = -EINVAL;
+ u32 chain_index;
+
+ if (!opcode)
+ ret = action > TC_ACT_VALUE_MAX ? -EINVAL : 0;
+ else if (opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC)
+ ret = 0;
+ if (ret) {
+ NL_SET_ERR_MSG(extack, "invalid control action");
+ goto end;
+ }
+
+ if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) {
+ chain_index = action & TC_ACT_EXT_VAL_MASK;
+ if (!tp || !newchain) {
+ ret = -EINVAL;
+ NL_SET_ERR_MSG(extack,
+ "can't goto NULL proto/chain");
+ goto end;
+ }
+ *newchain = tcf_chain_get_by_act(tp->chain->block, chain_index);
+ if (!*newchain) {
+ ret = -ENOMEM;
+ NL_SET_ERR_MSG(extack,
+ "can't allocate goto_chain");
+ }
+ }
+end:
+ return ret;
+}
+EXPORT_SYMBOL(tcf_action_check_ctrlact);
+
+struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
+ struct tcf_chain *goto_chain)
+{
+ a->tcfa_action = action;
+ rcu_swap_protected(a->goto_chain, goto_chain, 1);
+ return goto_chain;
+}
+EXPORT_SYMBOL(tcf_action_set_ctrlact);
+
/* XXX: For standalone actions, we don't need a RCU grace period either, because
* actions are always connected to filters and filters are already destroyed in
* RCU callbacks, so after a RCU grace period actions are already disconnected
@@ -78,13 +106,15 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
*/
static void free_tcf(struct tc_action *p)
{
+ struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1);
+
free_percpu(p->cpu_bstats);
free_percpu(p->cpu_bstats_hw);
free_percpu(p->cpu_qstats);
tcf_set_action_cookie(&p->act_cookie, NULL);
- if (p->goto_chain)
- tcf_action_goto_chain_fini(p);
+ if (chain)
+ tcf_chain_put_by_act(chain);
kfree(p);
}
@@ -654,6 +684,10 @@ repeat:
return TC_ACT_OK;
}
} else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) {
+ if (unlikely(!rcu_access_pointer(a->goto_chain))) {
+ net_warn_ratelimited("can't go to NULL chain!\n");
+ return TC_ACT_SHOT;
+ }
tcf_action_goto_chain_exec(a, res);
}
@@ -800,15 +834,6 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
return c;
}
-static bool tcf_action_valid(int action)
-{
- int opcode = TC_ACT_EXT_OPCODE(action);
-
- if (!opcode)
- return action <= TC_ACT_VALUE_MAX;
- return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC;
-}
-
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
char *name, int ovr, int bind,
@@ -890,10 +915,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
/* backward compatibility for policer */
if (name == NULL)
err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
- rtnl_held, extack);
+ rtnl_held, tp, extack);
else
err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
- extack);
+ tp, extack);
if (err < 0)
goto err_mod;
@@ -907,18 +932,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
if (err != ACT_P_CREATED)
module_put(a_o->owner);
- if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
- err = tcf_action_goto_chain_init(a, tp);
- if (err) {
- tcf_action_destroy_1(a, bind);
- NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
- return ERR_PTR(err);
- }
- }
-
- if (!tcf_action_valid(a->tcfa_action)) {
+ if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) &&
+ !rcu_access_pointer(a->goto_chain)) {
tcf_action_destroy_1(a, bind);
- NL_SET_ERR_MSG(extack, "Invalid control action value");
+ NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain");
return ERR_PTR(-EINVAL);
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index aa5c38d11a30..3841156aa09f 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -17,6 +17,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_bpf.h>
#include <net/tc_act/tc_bpf.h>
@@ -278,10 +279,11 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act,
int replace, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tcf_bpf_cfg cfg, old;
struct tc_act_bpf *parm;
struct tcf_bpf *prog;
@@ -323,12 +325,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
return ret;
}
+ ret = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (ret < 0)
+ goto release_idr;
+
is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
is_ebpf = tb[TCA_ACT_BPF_FD];
if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
ret = -EINVAL;
- goto out;
+ goto put_chain;
}
memset(&cfg, 0, sizeof(cfg));
@@ -336,7 +342,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
tcf_bpf_init_from_efd(tb, &cfg);
if (ret < 0)
- goto out;
+ goto put_chain;
prog = to_bpf(*act);
@@ -350,10 +356,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (cfg.bpf_num_ops)
prog->bpf_num_ops = cfg.bpf_num_ops;
- prog->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*act, parm->action, goto_ch);
rcu_assign_pointer(prog->filter, cfg.filter);
spin_unlock_bh(&prog->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
if (res == ACT_P_CREATED) {
tcf_idr_insert(tn, *act);
} else {
@@ -363,9 +372,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
}
return res;
-out:
- tcf_idr_release(*act, bind);
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
+release_idr:
+ tcf_idr_release(*act, bind);
return ret;
}
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 5d24993cccfe..32ae0cd6e31c 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -21,6 +21,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/act_api.h>
+#include <net/pkt_cls.h>
#include <uapi/linux/tc_act/tc_connmark.h>
#include <net/tc_act/tc_connmark.h>
@@ -97,13 +98,15 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
struct nlattr *tb[TCA_CONNMARK_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tcf_connmark_info *ci;
struct tc_connmark *parm;
- int ret = 0;
+ int ret = 0, err;
if (!nla)
return -EINVAL;
@@ -128,7 +131,11 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
}
ci = to_connmark(*a);
- ci->tcf_action = parm->action;
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
+ extack);
+ if (err < 0)
+ goto release_idr;
+ tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ci->net = net;
ci->zone = parm->zone;
@@ -142,15 +149,24 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
+ extack);
+ if (err < 0)
+ goto release_idr;
/* replacing action and zone */
spin_lock_bh(&ci->tcf_lock);
- ci->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ci->zone = parm->zone;
spin_unlock_bh(&ci->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
ret = 0;
}
return ret;
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index c79aca29505e..0c77e7bdf6d5 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -33,6 +33,7 @@
#include <net/sctp/checksum.h>
#include <net/act_api.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_csum.h>
#include <net/tc_act/tc_csum.h>
@@ -46,12 +47,13 @@ static struct tc_action_ops act_csum_ops;
static int tcf_csum_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held,
+ int bind, bool rtnl_held, struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
struct tcf_csum_params *params_new;
struct nlattr *tb[TCA_CSUM_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_csum *parm;
struct tcf_csum *p;
int ret = 0, err;
@@ -87,21 +89,27 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
return err;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
p = to_tcf_csum(*a);
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
if (unlikely(!params_new)) {
- tcf_idr_release(*a, bind);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto put_chain;
}
params_new->update_flags = parm->update_flags;
spin_lock_bh(&p->tcf_lock);
- p->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(p->params, params_new,
lockdep_is_held(&p->tcf_lock));
spin_unlock_bh(&p->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (params_new)
kfree_rcu(params_new, rcu);
@@ -109,6 +117,12 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
/**
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 93da0004e9f4..e540e31069d7 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -20,6 +20,7 @@
#include <linux/init.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_gact.h>
#include <net/tc_act/tc_gact.h>
@@ -57,10 +58,11 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
struct nlattr *tb[TCA_GACT_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_gact *parm;
struct tcf_gact *gact;
int ret = 0;
@@ -116,10 +118,13 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
return err;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
gact = to_gact(*a);
spin_lock_bh(&gact->tcf_lock);
- gact->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
#ifdef CONFIG_GACT_PROB
if (p_parm) {
gact->tcfg_paction = p_parm->paction;
@@ -133,9 +138,15 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
#endif
spin_unlock_bh(&gact->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 9b1f2b3990ee..31c6ffb6abe7 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -29,6 +29,7 @@
#include <net/net_namespace.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <uapi/linux/tc_act/tc_ife.h>
#include <net/tc_act/tc_ife.h>
#include <linux/etherdevice.h>
@@ -469,11 +470,12 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
struct nlattr *tb[TCA_IFE_MAX + 1];
struct nlattr *tb2[IFE_META_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tcf_ife_params *p;
struct tcf_ife_info *ife;
u16 ife_type = ETH_P_IFE;
@@ -531,6 +533,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}
ife = to_ife(*a);
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
p->flags = parm->flags;
if (parm->flags & IFE_ENCODE) {
@@ -563,13 +569,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
if (tb[TCA_IFE_METALST]) {
err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST],
NULL, NULL);
- if (err) {
-metadata_parse_err:
- tcf_idr_release(*a, bind);
- kfree(p);
- return err;
- }
-
+ if (err)
+ goto metadata_parse_err;
err = populate_metalist(ife, tb2, exists, rtnl_held);
if (err)
goto metadata_parse_err;
@@ -581,21 +582,20 @@ metadata_parse_err:
* going to bail out
*/
err = use_all_metadata(ife, exists);
- if (err) {
- tcf_idr_release(*a, bind);
- kfree(p);
- return err;
- }
+ if (err)
+ goto metadata_parse_err;
}
if (exists)
spin_lock_bh(&ife->tcf_lock);
- ife->tcf_action = parm->action;
/* protected by tcf_lock when modifying existing action */
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(ife->params, p, 1);
if (exists)
spin_unlock_bh(&ife->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (p)
kfree_rcu(p, rcu);
@@ -603,6 +603,13 @@ metadata_parse_err:
tcf_idr_insert(tn, *a);
return ret;
+metadata_parse_err:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ kfree(p);
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 98f5b6ea77b4..04a0b5c61194 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -97,7 +97,8 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- const struct tc_action_ops *ops, int ovr, int bind)
+ const struct tc_action_ops *ops, int ovr, int bind,
+ struct tcf_proto *tp)
{
struct tc_action_net *tn = net_generic(net, id);
struct nlattr *tb[TCA_IPT_MAX + 1];
@@ -205,20 +206,20 @@ err1:
static int tcf_ipt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held,
+ int bind, bool rtnl_held, struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
- bind);
+ bind, tp);
}
static int tcf_xt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool unlocked,
+ int bind, bool unlocked, struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
- bind);
+ bind, tp);
}
static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6692fd054617..17cc6bd4c57c 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -94,10 +94,12 @@ static struct tc_action_ops act_mirred_ops;
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
struct nlattr *tb[TCA_MIRRED_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
bool mac_header_xmit = false;
struct tc_mirred *parm;
struct tcf_mirred *m;
@@ -157,18 +159,23 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+
m = to_mirred(*a);
+ if (ret == ACT_P_CREATED)
+ INIT_LIST_HEAD(&m->tcfm_list);
+
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
spin_lock_bh(&m->tcf_lock);
- m->tcf_action = parm->action;
- m->tcfm_eaction = parm->eaction;
if (parm->ifindex) {
dev = dev_get_by_index(net, parm->ifindex);
if (!dev) {
spin_unlock_bh(&m->tcf_lock);
- tcf_idr_release(*a, bind);
- return -ENODEV;
+ err = -ENODEV;
+ goto put_chain;
}
mac_header_xmit = dev_is_mac_header_xmit(dev);
rcu_swap_protected(m->tcfm_dev, dev,
@@ -177,7 +184,11 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
dev_put(dev);
m->tcfm_mac_header_xmit = mac_header_xmit;
}
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ m->tcfm_eaction = parm->eaction;
spin_unlock_bh(&m->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED) {
spin_lock(&mirred_list_lock);
@@ -188,6 +199,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
}
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 543eab9193f1..e91bb8eb81ec 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -21,6 +21,7 @@
#include <linux/string.h>
#include <linux/tc_act/tc_nat.h>
#include <net/act_api.h>
+#include <net/pkt_cls.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/netlink.h>
@@ -38,10 +39,12 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
struct tc_action **a, int ovr, int bind,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ bool rtnl_held, struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
struct nlattr *tb[TCA_NAT_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_nat *parm;
int ret = 0, err;
struct tcf_nat *p;
@@ -76,6 +79,9 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
} else {
return err;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
p = to_tcf_nat(*a);
spin_lock_bh(&p->tcf_lock);
@@ -84,13 +90,18 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
p->mask = parm->mask;
p->flags = parm->flags;
- p->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
spin_unlock_bh(&p->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index a80373878df7..287793abfaf9 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -23,6 +23,7 @@
#include <linux/tc_act/tc_pedit.h>
#include <net/tc_act/tc_pedit.h>
#include <uapi/linux/tc_act/tc_pedit.h>
+#include <net/pkt_cls.h>
static unsigned int pedit_net_id;
static struct tc_action_ops act_pedit_ops;
@@ -138,10 +139,11 @@ nla_failure:
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
struct nlattr *tb[TCA_PEDIT_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_pedit_key *keys = NULL;
struct tcf_pedit_key_ex *keys_ex;
struct tc_pedit *parm;
@@ -205,6 +207,11 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
goto out_free;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0) {
+ ret = err;
+ goto out_release;
+ }
p = to_pedit(*a);
spin_lock_bh(&p->tcf_lock);
@@ -214,7 +221,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (!keys) {
spin_unlock_bh(&p->tcf_lock);
ret = -ENOMEM;
- goto out_release;
+ goto put_chain;
}
kfree(p->tcfp_keys);
p->tcfp_keys = keys;
@@ -223,16 +230,21 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
memcpy(p->tcfp_keys, parm->keys, ksize);
p->tcfp_flags = parm->flags;
- p->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
kfree(p->tcfp_keys_ex);
p->tcfp_keys_ex = keys_ex;
spin_unlock_bh(&p->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
out_release:
tcf_idr_release(*a, bind);
out_free:
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 8271a6263824..2b8581f6ab51 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -21,6 +21,7 @@
#include <linux/slab.h>
#include <net/act_api.h>
#include <net/netlink.h>
+#include <net/pkt_cls.h>
struct tcf_police_params {
int tcfp_result;
@@ -83,10 +84,12 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
static int tcf_police_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
int ret = 0, tcfp_result = TC_ACT_OK, err, size;
struct nlattr *tb[TCA_POLICE_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_police *parm;
struct tcf_police *police;
struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
@@ -128,6 +131,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
police = to_police(*a);
if (parm->rate.rate) {
@@ -213,12 +219,14 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
if (new->peak_present)
police->tcfp_ptoks = new->tcfp_mtu_ptoks;
spin_unlock_bh(&police->tcfp_lock);
- police->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(police->params,
new,
lockdep_is_held(&police->tcf_lock));
spin_unlock_bh(&police->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (new)
kfree_rcu(new, rcu);
@@ -229,6 +237,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
failure:
qdisc_put_rtab(P_tab);
qdisc_put_rtab(R_tab);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
tcf_idr_release(*a, bind);
return err;
}
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 203e399e5c85..4060b0955c97 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -22,6 +22,7 @@
#include <linux/tc_act/tc_sample.h>
#include <net/tc_act/tc_sample.h>
#include <net/psample.h>
+#include <net/pkt_cls.h>
#include <linux/if_arp.h>
@@ -37,12 +38,13 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
static int tcf_sample_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held,
+ int bind, bool rtnl_held, struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
struct nlattr *tb[TCA_SAMPLE_MAX + 1];
struct psample_group *psample_group;
+ struct tcf_chain *goto_ch = NULL;
struct tc_sample *parm;
u32 psample_group_num;
struct tcf_sample *s;
@@ -79,18 +81,21 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]);
psample_group = psample_group_get(net, psample_group_num);
if (!psample_group) {
- tcf_idr_release(*a, bind);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto put_chain;
}
s = to_sample(*a);
spin_lock_bh(&s->tcf_lock);
- s->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]);
s->psample_group_num = psample_group_num;
RCU_INIT_POINTER(s->psample_group, psample_group);
@@ -100,10 +105,18 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]);
}
spin_unlock_bh(&s->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static void tcf_sample_cleanup(struct tc_action *a)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index d54cb608dbaf..23c8ca5615e5 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -18,6 +18,7 @@
#include <linux/rtnetlink.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_defact.h>
#include <net/tc_act/tc_defact.h>
@@ -60,14 +61,26 @@ static int alloc_defdata(struct tcf_defact *d, const struct nlattr *defdata)
return 0;
}
-static void reset_policy(struct tcf_defact *d, const struct nlattr *defdata,
- struct tc_defact *p)
+static int reset_policy(struct tc_action *a, const struct nlattr *defdata,
+ struct tc_defact *p, struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
+ struct tcf_chain *goto_ch = NULL;
+ struct tcf_defact *d;
+ int err;
+
+ err = tcf_action_check_ctrlact(p->action, tp, &goto_ch, extack);
+ if (err < 0)
+ return err;
+ d = to_defact(a);
spin_lock_bh(&d->tcf_lock);
- d->tcf_action = p->action;
+ goto_ch = tcf_action_set_ctrlact(a, p->action, goto_ch);
memset(d->tcfd_defdata, 0, SIMP_MAX_DATA);
nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
spin_unlock_bh(&d->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+ return 0;
}
static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
@@ -78,10 +91,11 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
struct nlattr *tb[TCA_DEF_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_defact *parm;
struct tcf_defact *d;
bool exists = false;
@@ -122,27 +136,37 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
}
d = to_defact(*a);
- ret = alloc_defdata(d, tb[TCA_DEF_DATA]);
- if (ret < 0) {
- tcf_idr_release(*a, bind);
- return ret;
- }
- d->tcf_action = parm->action;
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
+ extack);
+ if (err < 0)
+ goto release_idr;
+
+ err = alloc_defdata(d, tb[TCA_DEF_DATA]);
+ if (err < 0)
+ goto put_chain;
+
+ tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ret = ACT_P_CREATED;
} else {
- d = to_defact(*a);
-
if (!ovr) {
- tcf_idr_release(*a, bind);
- return -EEXIST;
+ err = -EEXIST;
+ goto release_idr;
}
- reset_policy(d, tb[TCA_DEF_DATA], parm);
+ err = reset_policy(*a, tb[TCA_DEF_DATA], parm, tp, extack);
+ if (err)
+ goto release_idr;
}
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 65879500b688..7e1d261a31d2 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -26,6 +26,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/dsfield.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_skbedit.h>
#include <net/tc_act/tc_skbedit.h>
@@ -96,11 +97,13 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
struct tcf_skbedit_params *params_new;
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_skbedit *parm;
struct tcf_skbedit *d;
u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
@@ -186,11 +189,14 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
return -EEXIST;
}
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
if (unlikely(!params_new)) {
- tcf_idr_release(*a, bind);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto put_chain;
}
params_new->flags = flags;
@@ -208,16 +214,24 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
params_new->mask = *mask;
spin_lock_bh(&d->tcf_lock);
- d->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(d->params, params_new,
lockdep_is_held(&d->tcf_lock));
spin_unlock_bh(&d->tcf_lock);
if (params_new)
kfree_rcu(params_new, rcu);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 7bac1d78e7a3..1d4c324d0a42 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -16,6 +16,7 @@
#include <linux/rtnetlink.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_skbmod.h>
#include <net/tc_act/tc_skbmod.h>
@@ -82,11 +83,13 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
struct nlattr *tb[TCA_SKBMOD_MAX + 1];
struct tcf_skbmod_params *p, *p_old;
+ struct tcf_chain *goto_ch = NULL;
struct tc_skbmod *parm;
struct tcf_skbmod *d;
bool exists = false;
@@ -153,21 +156,24 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
d = to_skbmod(*a);
p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
if (unlikely(!p)) {
- tcf_idr_release(*a, bind);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto put_chain;
}
p->flags = lflags;
- d->tcf_action = parm->action;
if (ovr)
spin_lock_bh(&d->tcf_lock);
/* Protected by tcf_lock if overwriting existing action. */
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
p_old = rcu_dereference_protected(d->skbmod_p, 1);
if (lflags & SKBMOD_F_DMAC)
@@ -183,10 +189,18 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
if (p_old)
kfree_rcu(p_old, rcu);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static void tcf_skbmod_cleanup(struct tc_action *a)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 3beb4717d3b7..d5aaf90a3971 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -17,6 +17,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_tunnel_key.h>
@@ -201,26 +202,23 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
{
if (!p)
return;
- if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
-#ifdef CONFIG_DST_CACHE
- struct ip_tunnel_info *info = &p->tcft_enc_metadata->u.tun_info;
-
- dst_cache_destroy(&info->dst_cache);
-#endif
+ if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
dst_release(&p->tcft_enc_metadata->dst);
- }
+
kfree_rcu(p, rcu);
}
static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
struct tcf_tunnel_key_params *params_new;
struct metadata_dst *metadata = NULL;
+ struct tcf_chain *goto_ch = NULL;
struct tc_tunnel_key *parm;
struct tcf_tunnel_key *t;
bool exists = false;
@@ -338,7 +336,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
&metadata->u.tun_info,
opts_len, extack);
if (ret < 0)
- goto release_dst_cache;
+ goto release_tun_meta;
}
metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
@@ -354,16 +352,22 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
&act_tunnel_key_ops, bind, true);
if (ret) {
NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
- goto release_dst_cache;
+ goto release_tun_meta;
}
ret = ACT_P_CREATED;
} else if (!ovr) {
NL_SET_ERR_MSG(extack, "TC IDR already exists");
ret = -EEXIST;
- goto release_dst_cache;
+ goto release_tun_meta;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0) {
+ ret = err;
+ exists = true;
+ goto release_tun_meta;
+ }
t = to_tunnel_key(*a);
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
@@ -371,29 +375,30 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters");
ret = -ENOMEM;
exists = true;
- goto release_dst_cache;
+ goto put_chain;
}
params_new->tcft_action = parm->t_action;
params_new->tcft_enc_metadata = metadata;
spin_lock_bh(&t->tcf_lock);
- t->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(t->params, params_new,
lockdep_is_held(&t->tcf_lock));
spin_unlock_bh(&t->tcf_lock);
tunnel_key_release_params(params_new);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
-release_dst_cache:
-#ifdef CONFIG_DST_CACHE
- if (metadata)
- dst_cache_destroy(&metadata->u.tun_info.dst_cache);
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
release_tun_meta:
-#endif
if (metadata)
dst_release(&metadata->dst);
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index ac0061599225..0f40d0a74423 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -15,6 +15,7 @@
#include <linux/if_vlan.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_vlan.h>
#include <net/tc_act/tc_vlan.h>
@@ -105,10 +106,11 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
static int tcf_vlan_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
struct nlattr *tb[TCA_VLAN_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tcf_vlan_params *p;
struct tc_vlan *parm;
struct tcf_vlan *v;
@@ -200,12 +202,16 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
return -EEXIST;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
v = to_vlan(*a);
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p) {
- tcf_idr_release(*a, bind);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto put_chain;
}
p->tcfv_action = action;
@@ -214,16 +220,24 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
p->tcfv_push_proto = push_proto;
spin_lock_bh(&v->tcf_lock);
- v->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(v->vlan_p, p, lockdep_is_held(&v->tcf_lock));
spin_unlock_bh(&v->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (p)
kfree_rcu(p, rcu);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static void tcf_vlan_cleanup(struct tc_action *a)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 478095d50f95..99ae30c177c7 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -367,7 +367,7 @@ static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
struct tcf_block *block = chain->block;
mutex_destroy(&chain->filter_chain_lock);
- kfree(chain);
+ kfree_rcu(chain, rcu);
if (free_block)
tcf_block_destroy(block);
}
@@ -470,10 +470,9 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
{
struct tcf_block *block = chain->block;
const struct tcf_proto_ops *tmplt_ops;
- bool is_last, free_block = false;
+ bool free_block = false;
unsigned int refcnt;
void *tmplt_priv;
- u32 chain_index;
mutex_lock(&block->lock);
if (explicitly_created) {
@@ -492,23 +491,21 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
* save these to temporary variables.
*/
refcnt = --chain->refcnt;
- is_last = refcnt - chain->action_refcnt == 0;
tmplt_ops = chain->tmplt_ops;
tmplt_priv = chain->tmplt_priv;
- chain_index = chain->index;
-
- if (refcnt == 0)
- free_block = tcf_chain_detach(chain);
- mutex_unlock(&block->lock);
/* The last dropped non-action reference will trigger notification. */
- if (is_last && !by_act) {
- tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain_index,
+ if (refcnt - chain->action_refcnt == 0 && !by_act) {
+ tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
block, NULL, 0, 0, false);
/* Last reference to chain, no need to lock. */
chain->flushing = false;
}
+ if (refcnt == 0)
+ free_block = tcf_chain_detach(chain);
+ mutex_unlock(&block->lock);
+
if (refcnt == 0) {
tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
tcf_chain_destroy(chain, free_block);
@@ -1896,6 +1893,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ int err = 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
@@ -1909,10 +1907,14 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
}
if (unicast)
- return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ else
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ if (err > 0)
+ err = 0;
+ return err;
}
static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
@@ -1944,12 +1946,15 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
}
if (unicast)
- return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
-
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ else
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
+
+ if (err > 0)
+ err = 0;
return err;
}
@@ -2691,6 +2696,7 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
struct tcf_block *block = chain->block;
struct net *net = block->net;
struct sk_buff *skb;
+ int err = 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
@@ -2704,9 +2710,14 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
}
if (unicast)
- return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ else
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ flags & NLM_F_ECHO);
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
+ if (err > 0)
+ err = 0;
+ return err;
}
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 27300a3e76c7..c04247b403ed 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1348,46 +1348,46 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto errout;
- if (!handle) {
- handle = 1;
- err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
- INT_MAX, GFP_KERNEL);
- } else if (!fold) {
- /* user specifies a handle and it doesn't exist */
- err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
- handle, GFP_KERNEL);
- }
- if (err)
- goto errout;
- fnew->handle = handle;
-
if (tb[TCA_FLOWER_FLAGS]) {
fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
if (!tc_flags_valid(fnew->flags)) {
err = -EINVAL;
- goto errout_idr;
+ goto errout;
}
}
err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
tp->chain->tmplt_priv, extack);
if (err)
- goto errout_idr;
+ goto errout;
err = fl_check_assign_mask(head, fnew, fold, mask);
if (err)
- goto errout_idr;
+ goto errout;
+
+ if (!handle) {
+ handle = 1;
+ err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
+ INT_MAX, GFP_KERNEL);
+ } else if (!fold) {
+ /* user specifies a handle and it doesn't exist */
+ err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
+ handle, GFP_KERNEL);
+ }
+ if (err)
+ goto errout_mask;
+ fnew->handle = handle;
if (!fold && __fl_lookup(fnew->mask, &fnew->mkey)) {
err = -EEXIST;
- goto errout_mask;
+ goto errout_idr;
}
err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
fnew->mask->filter_ht_params);
if (err)
- goto errout_mask;
+ goto errout_idr;
if (!tc_skip_hw(fnew->flags)) {
err = fl_hw_replace_filter(tp, fnew, extack);
@@ -1426,12 +1426,13 @@ errout_mask_ht:
rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
fnew->mask->filter_ht_params);
-errout_mask:
- fl_mask_put(head, fnew->mask, false);
-
errout_idr:
if (!fold)
idr_remove(&head->handle_idr, fnew->handle);
+
+errout_mask:
+ fl_mask_put(head, fnew->mask, false);
+
errout:
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 352b46f98440..fb8f138b9776 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1824,6 +1824,7 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ int err = 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
@@ -1834,8 +1835,11 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
return -EINVAL;
}
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+ if (err > 0)
+ err = 0;
+ return err;
}
static int tclass_del_notify(struct net *net,
@@ -1866,8 +1870,11 @@ static int tclass_del_notify(struct net *net,
return err;
}
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+ if (err > 0)
+ err = 0;
+ return err;
}
#ifdef CONFIG_NET_CLS
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 1d2a12132abc..acc9b9da985f 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -211,6 +211,9 @@ struct cake_sched_data {
u8 ack_filter;
u8 atm_mode;
+ u32 fwmark_mask;
+ u16 fwmark_shft;
+
/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
u16 rate_shft;
ktime_t time_next_packet;
@@ -258,8 +261,7 @@ enum {
CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
CAKE_FLAG_INGRESS = BIT(2),
CAKE_FLAG_WASH = BIT(3),
- CAKE_FLAG_SPLIT_GSO = BIT(4),
- CAKE_FLAG_FWMARK = BIT(5)
+ CAKE_FLAG_SPLIT_GSO = BIT(4)
};
/* COBALT operates the Codel and BLUE algorithms in parallel, in order to
@@ -1543,7 +1545,7 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
struct sk_buff *skb)
{
struct cake_sched_data *q = qdisc_priv(sch);
- u32 tin;
+ u32 tin, mark;
u8 dscp;
/* Tin selection: Default to diffserv-based selection, allow overriding
@@ -1551,14 +1553,13 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
*/
dscp = cake_handle_diffserv(skb,
q->rate_flags & CAKE_FLAG_WASH);
+ mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
tin = 0;
- else if (q->rate_flags & CAKE_FLAG_FWMARK && /* use fw mark */
- skb->mark &&
- skb->mark <= q->tin_cnt)
- tin = q->tin_order[skb->mark - 1];
+ else if (mark && mark <= q->tin_cnt)
+ tin = q->tin_order[mark - 1];
else if (TC_H_MAJ(skb->priority) == sch->handle &&
TC_H_MIN(skb->priority) > 0 &&
@@ -2172,6 +2173,7 @@ static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = {
[TCA_CAKE_MPU] = { .type = NLA_U32 },
[TCA_CAKE_INGRESS] = { .type = NLA_U32 },
[TCA_CAKE_ACK_FILTER] = { .type = NLA_U32 },
+ [TCA_CAKE_FWMARK] = { .type = NLA_U32 },
};
static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
@@ -2619,10 +2621,8 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
}
if (tb[TCA_CAKE_FWMARK]) {
- if (!!nla_get_u32(tb[TCA_CAKE_FWMARK]))
- q->rate_flags |= CAKE_FLAG_FWMARK;
- else
- q->rate_flags &= ~CAKE_FLAG_FWMARK;
+ q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]);
+ q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0;
}
if (q->tins) {
@@ -2784,8 +2784,7 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_FWMARK,
- !!(q->rate_flags & CAKE_FLAG_FWMARK)))
+ if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask))
goto nla_put_failure;
return nla_nest_end(skb, opts);
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 5b537613946f..39d72e58b8e5 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -471,12 +471,6 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
struct crypto_shash *tfm = NULL;
__u16 id;
- /* If AUTH extension is disabled, we are done */
- if (!ep->auth_enable) {
- ep->auth_hmacs = NULL;
- return 0;
- }
-
/* If the transforms are already allocated, we are done */
if (ep->auth_hmacs)
return 0;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 40c7eb941bc9..0448b68fce74 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -107,6 +107,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
auth_chunks->param_hdr.length =
htons(sizeof(struct sctp_paramhdr) + 2);
}
+
+ /* Allocate and initialize transorms arrays for supported
+ * HMACs.
+ */
+ err = sctp_auth_init_hmacs(ep, gfp);
+ if (err)
+ goto nomem;
}
/* Initialize the base structure. */
@@ -150,15 +157,10 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
INIT_LIST_HEAD(&ep->endpoint_shared_keys);
null_key = sctp_auth_shkey_create(0, gfp);
if (!null_key)
- goto nomem;
+ goto nomem_shkey;
list_add(&null_key->key_list, &ep->endpoint_shared_keys);
- /* Allocate and initialize transorms arrays for supported HMACs. */
- err = sctp_auth_init_hmacs(ep, gfp);
- if (err)
- goto nomem_hmacs;
-
/* Add the null key to the endpoint shared keys list and
* set the hmcas and chunks pointers.
*/
@@ -169,8 +171,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
return ep;
-nomem_hmacs:
- sctp_auth_destroy_keys(&ep->endpoint_shared_keys);
+nomem_shkey:
+ sctp_auth_destroy_hmacs(ep->auth_hmacs);
nomem:
/* Free all allocations */
kfree(auth_hmacs);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 533207dbeae9..9874e60c9b0d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -102,9 +102,9 @@ static int sctp_send_asconf(struct sctp_association *asoc,
struct sctp_chunk *chunk);
static int sctp_do_bind(struct sock *, union sctp_addr *, int);
static int sctp_autobind(struct sock *sk);
-static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
- struct sctp_association *assoc,
- enum sctp_socket_type type);
+static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
+ struct sctp_association *assoc,
+ enum sctp_socket_type type);
static unsigned long sctp_memory_pressure;
static atomic_long_t sctp_memory_allocated;
@@ -999,7 +999,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
if (unlikely(addrs_size <= 0))
return -EINVAL;
- kaddrs = vmemdup_user(addrs, addrs_size);
+ kaddrs = memdup_user(addrs, addrs_size);
if (unlikely(IS_ERR(kaddrs)))
return PTR_ERR(kaddrs);
@@ -1007,7 +1007,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
addr_buf = kaddrs;
while (walk_size < addrs_size) {
if (walk_size + sizeof(sa_family_t) > addrs_size) {
- kvfree(kaddrs);
+ kfree(kaddrs);
return -EINVAL;
}
@@ -1018,7 +1018,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
* causes the address buffer to overflow return EINVAL.
*/
if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
- kvfree(kaddrs);
+ kfree(kaddrs);
return -EINVAL;
}
addrcnt++;
@@ -1054,7 +1054,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
}
out:
- kvfree(kaddrs);
+ kfree(kaddrs);
return err;
}
@@ -1329,7 +1329,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
if (unlikely(addrs_size <= 0))
return -EINVAL;
- kaddrs = vmemdup_user(addrs, addrs_size);
+ kaddrs = memdup_user(addrs, addrs_size);
if (unlikely(IS_ERR(kaddrs)))
return PTR_ERR(kaddrs);
@@ -1349,7 +1349,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id);
out_free:
- kvfree(kaddrs);
+ kfree(kaddrs);
return err;
}
@@ -2920,6 +2920,9 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk,
return 0;
}
+ if (sctp_style(sk, TCP))
+ params.sack_assoc_id = SCTP_FUTURE_ASSOC;
+
if (params.sack_assoc_id == SCTP_FUTURE_ASSOC ||
params.sack_assoc_id == SCTP_ALL_ASSOC) {
if (params.sack_delay) {
@@ -3024,6 +3027,9 @@ static int sctp_setsockopt_default_send_param(struct sock *sk,
return 0;
}
+ if (sctp_style(sk, TCP))
+ info.sinfo_assoc_id = SCTP_FUTURE_ASSOC;
+
if (info.sinfo_assoc_id == SCTP_FUTURE_ASSOC ||
info.sinfo_assoc_id == SCTP_ALL_ASSOC) {
sp->default_stream = info.sinfo_stream;
@@ -3081,6 +3087,9 @@ static int sctp_setsockopt_default_sndinfo(struct sock *sk,
return 0;
}
+ if (sctp_style(sk, TCP))
+ info.snd_assoc_id = SCTP_FUTURE_ASSOC;
+
if (info.snd_assoc_id == SCTP_FUTURE_ASSOC ||
info.snd_assoc_id == SCTP_ALL_ASSOC) {
sp->default_stream = info.snd_sid;
@@ -3531,6 +3540,9 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
return 0;
}
+ if (sctp_style(sk, TCP))
+ params.assoc_id = SCTP_FUTURE_ASSOC;
+
if (params.assoc_id == SCTP_FUTURE_ASSOC ||
params.assoc_id == SCTP_ALL_ASSOC)
sp->default_rcv_context = params.assoc_value;
@@ -3670,6 +3682,9 @@ static int sctp_setsockopt_maxburst(struct sock *sk,
return 0;
}
+ if (sctp_style(sk, TCP))
+ params.assoc_id = SCTP_FUTURE_ASSOC;
+
if (params.assoc_id == SCTP_FUTURE_ASSOC ||
params.assoc_id == SCTP_ALL_ASSOC)
sp->max_burst = params.assoc_value;
@@ -3798,6 +3813,9 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
goto out;
}
+ if (sctp_style(sk, TCP))
+ authkey->sca_assoc_id = SCTP_FUTURE_ASSOC;
+
if (authkey->sca_assoc_id == SCTP_FUTURE_ASSOC ||
authkey->sca_assoc_id == SCTP_ALL_ASSOC) {
ret = sctp_auth_set_key(ep, asoc, authkey);
@@ -3853,6 +3871,9 @@ static int sctp_setsockopt_active_key(struct sock *sk,
if (asoc)
return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber);
+ if (sctp_style(sk, TCP))
+ val.scact_assoc_id = SCTP_FUTURE_ASSOC;
+
if (val.scact_assoc_id == SCTP_FUTURE_ASSOC ||
val.scact_assoc_id == SCTP_ALL_ASSOC) {
ret = sctp_auth_set_active_key(ep, asoc, val.scact_keynumber);
@@ -3904,6 +3925,9 @@ static int sctp_setsockopt_del_key(struct sock *sk,
if (asoc)
return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber);
+ if (sctp_style(sk, TCP))
+ val.scact_assoc_id = SCTP_FUTURE_ASSOC;
+
if (val.scact_assoc_id == SCTP_FUTURE_ASSOC ||
val.scact_assoc_id == SCTP_ALL_ASSOC) {
ret = sctp_auth_del_key_id(ep, asoc, val.scact_keynumber);
@@ -3954,6 +3978,9 @@ static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval,
if (asoc)
return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber);
+ if (sctp_style(sk, TCP))
+ val.scact_assoc_id = SCTP_FUTURE_ASSOC;
+
if (val.scact_assoc_id == SCTP_FUTURE_ASSOC ||
val.scact_assoc_id == SCTP_ALL_ASSOC) {
ret = sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber);
@@ -4169,6 +4196,9 @@ static int sctp_setsockopt_default_prinfo(struct sock *sk,
goto out;
}
+ if (sctp_style(sk, TCP))
+ info.pr_assoc_id = SCTP_FUTURE_ASSOC;
+
if (info.pr_assoc_id == SCTP_FUTURE_ASSOC ||
info.pr_assoc_id == SCTP_ALL_ASSOC) {
SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy);
@@ -4251,6 +4281,9 @@ static int sctp_setsockopt_enable_strreset(struct sock *sk,
goto out;
}
+ if (sctp_style(sk, TCP))
+ params.assoc_id = SCTP_FUTURE_ASSOC;
+
if (params.assoc_id == SCTP_FUTURE_ASSOC ||
params.assoc_id == SCTP_ALL_ASSOC)
ep->strreset_enable = params.assoc_value;
@@ -4376,6 +4409,9 @@ static int sctp_setsockopt_scheduler(struct sock *sk,
if (asoc)
return sctp_sched_set_sched(asoc, params.assoc_value);
+ if (sctp_style(sk, TCP))
+ params.assoc_id = SCTP_FUTURE_ASSOC;
+
if (params.assoc_id == SCTP_FUTURE_ASSOC ||
params.assoc_id == SCTP_ALL_ASSOC)
sp->default_ss = params.assoc_value;
@@ -4541,6 +4577,9 @@ static int sctp_setsockopt_event(struct sock *sk, char __user *optval,
if (asoc)
return sctp_assoc_ulpevent_type_set(&param, asoc);
+ if (sctp_style(sk, TCP))
+ param.se_assoc_id = SCTP_FUTURE_ASSOC;
+
if (param.se_assoc_id == SCTP_FUTURE_ASSOC ||
param.se_assoc_id == SCTP_ALL_ASSOC)
sctp_ulpevent_type_set(&sp->subscribe,
@@ -4891,7 +4930,11 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
/* Populate the fields of the newsk from the oldsk and migrate the
* asoc to the newsk.
*/
- sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
+ error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
+ if (error) {
+ sk_common_release(newsk);
+ newsk = NULL;
+ }
out:
release_sock(sk);
@@ -5639,7 +5682,12 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
/* Populate the fields of the newsk from the oldsk and migrate the
* asoc to the newsk.
*/
- sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH);
+ err = sctp_sock_migrate(sk, sock->sk, asoc,
+ SCTP_SOCKET_UDP_HIGH_BANDWIDTH);
+ if (err) {
+ sock_release(sock);
+ sock = NULL;
+ }
*sockp = sock;
@@ -9160,7 +9208,7 @@ static inline void sctp_copy_descendant(struct sock *sk_to,
{
int ancestor_size = sizeof(struct inet_sock) +
sizeof(struct sctp_sock) -
- offsetof(struct sctp_sock, auto_asconf_list);
+ offsetof(struct sctp_sock, pd_lobby);
if (sk_from->sk_family == PF_INET6)
ancestor_size += sizeof(struct ipv6_pinfo);
@@ -9171,9 +9219,9 @@ static inline void sctp_copy_descendant(struct sock *sk_to,
/* Populate the fields of the newsk from the oldsk and migrate the assoc
* and its messages to the newsk.
*/
-static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
- struct sctp_association *assoc,
- enum sctp_socket_type type)
+static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
+ struct sctp_association *assoc,
+ enum sctp_socket_type type)
{
struct sctp_sock *oldsp = sctp_sk(oldsk);
struct sctp_sock *newsp = sctp_sk(newsk);
@@ -9182,6 +9230,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
struct sk_buff *skb, *tmp;
struct sctp_ulpevent *event;
struct sctp_bind_hashbucket *head;
+ int err;
/* Migrate socket buffer sizes and all the socket level options to the
* new socket.
@@ -9210,8 +9259,20 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
/* Copy the bind_addr list from the original endpoint to the new
* endpoint so that we can handle restarts properly
*/
- sctp_bind_addr_dup(&newsp->ep->base.bind_addr,
- &oldsp->ep->base.bind_addr, GFP_KERNEL);
+ err = sctp_bind_addr_dup(&newsp->ep->base.bind_addr,
+ &oldsp->ep->base.bind_addr, GFP_KERNEL);
+ if (err)
+ return err;
+
+ /* New ep's auth_hmacs should be set if old ep's is set, in case
+ * that net->sctp.auth_enable has been changed to 0 by users and
+ * new ep's auth_hmacs couldn't be set in sctp_endpoint_init().
+ */
+ if (oldsp->ep->auth_hmacs) {
+ err = sctp_auth_init_hmacs(newsp->ep, GFP_KERNEL);
+ if (err)
+ return err;
+ }
/* Move any messages in the old socket's receive queue that are for the
* peeled off association to the new socket's receive queue.
@@ -9231,7 +9292,6 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
* 2) Peeling off partial delivery; keep pd_lobby in new pd_lobby.
* 3) Peeling off non-partial delivery; move pd_lobby to receive_queue.
*/
- skb_queue_head_init(&newsp->pd_lobby);
atomic_set(&sctp_sk(newsk)->pd_mode, assoc->ulpq.pd_mode);
if (atomic_read(&sctp_sk(oldsk)->pd_mode)) {
@@ -9296,6 +9356,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
}
release_sock(newsk);
+
+ return 0;
}
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 2936ed17bf9e..b6bb68adac6e 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -37,66 +37,6 @@
#include <net/sctp/sm.h>
#include <net/sctp/stream_sched.h>
-static struct flex_array *fa_alloc(size_t elem_size, size_t elem_count,
- gfp_t gfp)
-{
- struct flex_array *result;
- int err;
-
- result = flex_array_alloc(elem_size, elem_count, gfp);
- if (result) {
- err = flex_array_prealloc(result, 0, elem_count, gfp);
- if (err) {
- flex_array_free(result);
- result = NULL;
- }
- }
-
- return result;
-}
-
-static void fa_free(struct flex_array *fa)
-{
- if (fa)
- flex_array_free(fa);
-}
-
-static void fa_copy(struct flex_array *fa, struct flex_array *from,
- size_t index, size_t count)
-{
- void *elem;
-
- while (count--) {
- elem = flex_array_get(from, index);
- flex_array_put(fa, index, elem, 0);
- index++;
- }
-}
-
-static void fa_zero(struct flex_array *fa, size_t index, size_t count)
-{
- void *elem;
-
- while (count--) {
- elem = flex_array_get(fa, index);
- memset(elem, 0, fa->element_size);
- index++;
- }
-}
-
-static size_t fa_index(struct flex_array *fa, void *elem, size_t count)
-{
- size_t index = 0;
-
- while (count--) {
- if (elem == flex_array_get(fa, index))
- break;
- index++;
- }
-
- return index;
-}
-
/* Migrates chunks from stream queues to new stream queues if needed,
* but not across associations. Also, removes those chunks to streams
* higher than the new max.
@@ -153,53 +93,32 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream,
static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
gfp_t gfp)
{
- struct flex_array *out;
- size_t elem_size = sizeof(struct sctp_stream_out);
-
- out = fa_alloc(elem_size, outcnt, gfp);
- if (!out)
- return -ENOMEM;
-
- if (stream->out) {
- fa_copy(out, stream->out, 0, min(outcnt, stream->outcnt));
- if (stream->out_curr) {
- size_t index = fa_index(stream->out, stream->out_curr,
- stream->outcnt);
-
- BUG_ON(index == stream->outcnt);
- stream->out_curr = flex_array_get(out, index);
- }
- fa_free(stream->out);
- }
+ int ret;
- if (outcnt > stream->outcnt)
- fa_zero(out, stream->outcnt, (outcnt - stream->outcnt));
+ if (outcnt <= stream->outcnt)
+ return 0;
- stream->out = out;
+ ret = genradix_prealloc(&stream->out, outcnt, gfp);
+ if (ret)
+ return ret;
+ stream->outcnt = outcnt;
return 0;
}
static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt,
gfp_t gfp)
{
- struct flex_array *in;
- size_t elem_size = sizeof(struct sctp_stream_in);
-
- in = fa_alloc(elem_size, incnt, gfp);
- if (!in)
- return -ENOMEM;
-
- if (stream->in) {
- fa_copy(in, stream->in, 0, min(incnt, stream->incnt));
- fa_free(stream->in);
- }
+ int ret;
- if (incnt > stream->incnt)
- fa_zero(in, stream->incnt, (incnt - stream->incnt));
+ if (incnt <= stream->incnt)
+ return 0;
- stream->in = in;
+ ret = genradix_prealloc(&stream->in, incnt, gfp);
+ if (ret)
+ return ret;
+ stream->incnt = incnt;
return 0;
}
@@ -226,12 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
if (ret)
goto out;
- stream->outcnt = outcnt;
for (i = 0; i < stream->outcnt; i++)
SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
- sched->init(stream);
-
in:
sctp_stream_interleave_init(stream);
if (!incnt)
@@ -240,14 +156,11 @@ in:
ret = sctp_stream_alloc_in(stream, incnt, gfp);
if (ret) {
sched->free(stream);
- fa_free(stream->out);
- stream->out = NULL;
+ genradix_free(&stream->out);
stream->outcnt = 0;
goto out;
}
- stream->incnt = incnt;
-
out:
return ret;
}
@@ -272,8 +185,8 @@ void sctp_stream_free(struct sctp_stream *stream)
sched->free(stream);
for (i = 0; i < stream->outcnt; i++)
kfree(SCTP_SO(stream, i)->ext);
- fa_free(stream->out);
- fa_free(stream->in);
+ genradix_free(&stream->out);
+ genradix_free(&stream->in);
}
void sctp_stream_clear(struct sctp_stream *stream)
@@ -304,8 +217,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
sched->sched_all(stream);
- new->out = NULL;
- new->in = NULL;
+ new->out.tree.root = NULL;
+ new->in.tree.root = NULL;
new->outcnt = 0;
new->incnt = 0;
}
@@ -557,8 +470,6 @@ int sctp_send_add_streams(struct sctp_association *asoc,
goto out;
}
- stream->outcnt = outcnt;
-
asoc->strreset_outstanding = !!out + !!in;
out:
diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
index a6bf21579466..102c6fefe38c 100644
--- a/net/sctp/stream_interleave.c
+++ b/net/sctp/stream_interleave.c
@@ -101,7 +101,7 @@ static void sctp_chunk_assign_mid(struct sctp_chunk *chunk)
static bool sctp_validate_data(struct sctp_chunk *chunk)
{
- const struct sctp_stream *stream;
+ struct sctp_stream *stream;
__u16 sid, ssn;
if (chunk->chunk_hdr->type != SCTP_CID_DATA)
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index bbcf0fe4ae10..413a6abf227e 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -136,7 +136,6 @@ static int smc_rx_pipe_buf_nosteal(struct pipe_inode_info *pipe,
}
static const struct pipe_buf_operations smc_pipe_ops = {
- .can_merge = 0,
.confirm = generic_pipe_buf_confirm,
.release = smc_rx_pipe_buf_release,
.steal = smc_rx_pipe_buf_nosteal,
diff --git a/net/socket.c b/net/socket.c
index 3c176a12fe48..8255f5bda0aa 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -384,6 +384,18 @@ static struct file_system_type sock_fs_type = {
* but we take care of internal coherence yet.
*/
+/**
+ * sock_alloc_file - Bind a &socket to a &file
+ * @sock: socket
+ * @flags: file status flags
+ * @dname: protocol name
+ *
+ * Returns the &file bound with @sock, implicitly storing it
+ * in sock->file. If dname is %NULL, sets to "".
+ * On failure the return is a ERR pointer (see linux/err.h).
+ * This function uses GFP_KERNEL internally.
+ */
+
struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
{
struct file *file;
@@ -424,6 +436,14 @@ static int sock_map_fd(struct socket *sock, int flags)
return PTR_ERR(newfile);
}
+/**
+ * sock_from_file - Return the &socket bounded to @file.
+ * @file: file
+ * @err: pointer to an error code return
+ *
+ * On failure returns %NULL and assigns -ENOTSOCK to @err.
+ */
+
struct socket *sock_from_file(struct file *file, int *err)
{
if (file->f_op == &socket_file_ops)
@@ -532,11 +552,11 @@ static const struct inode_operations sockfs_inode_ops = {
};
/**
- * sock_alloc - allocate a socket
+ * sock_alloc - allocate a socket
*
* Allocate a new inode and socket object. The two are bound together
* and initialised. The socket is then returned. If we are out of inodes
- * NULL is returned.
+ * NULL is returned. This functions uses GFP_KERNEL internally.
*/
struct socket *sock_alloc(void)
@@ -561,7 +581,7 @@ struct socket *sock_alloc(void)
EXPORT_SYMBOL(sock_alloc);
/**
- * sock_release - close a socket
+ * sock_release - close a socket
* @sock: socket to close
*
* The socket is released from the protocol stack if it has a release
@@ -617,6 +637,15 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
}
EXPORT_SYMBOL(__sock_tx_timestamp);
+/**
+ * sock_sendmsg - send a message through @sock
+ * @sock: socket
+ * @msg: message to send
+ *
+ * Sends @msg through @sock, passing through LSM.
+ * Returns the number of bytes sent, or an error code.
+ */
+
static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
@@ -633,6 +662,18 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg)
}
EXPORT_SYMBOL(sock_sendmsg);
+/**
+ * kernel_sendmsg - send a message through @sock (kernel-space)
+ * @sock: socket
+ * @msg: message header
+ * @vec: kernel vec
+ * @num: vec array length
+ * @size: total message data size
+ *
+ * Builds the message data with @vec and sends it through @sock.
+ * Returns the number of bytes sent, or an error code.
+ */
+
int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
@@ -641,6 +682,19 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
}
EXPORT_SYMBOL(kernel_sendmsg);
+/**
+ * kernel_sendmsg_locked - send a message through @sock (kernel-space)
+ * @sk: sock
+ * @msg: message header
+ * @vec: output s/g array
+ * @num: output s/g array length
+ * @size: total message data size
+ *
+ * Builds the message data with @vec and sends it through @sock.
+ * Returns the number of bytes sent, or an error code.
+ * Caller must hold @sk.
+ */
+
int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
@@ -811,6 +865,16 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
}
EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
+/**
+ * sock_recvmsg - receive a message from @sock
+ * @sock: socket
+ * @msg: message to receive
+ * @flags: message flags
+ *
+ * Receives @msg from @sock, passing through LSM. Returns the total number
+ * of bytes received, or an error.
+ */
+
static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
int flags)
{
@@ -826,20 +890,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
EXPORT_SYMBOL(sock_recvmsg);
/**
- * kernel_recvmsg - Receive a message from a socket (kernel space)
- * @sock: The socket to receive the message from
- * @msg: Received message
- * @vec: Input s/g array for message data
- * @num: Size of input s/g array
- * @size: Number of bytes to read
- * @flags: Message flags (MSG_DONTWAIT, etc...)
+ * kernel_recvmsg - Receive a message from a socket (kernel space)
+ * @sock: The socket to receive the message from
+ * @msg: Received message
+ * @vec: Input s/g array for message data
+ * @num: Size of input s/g array
+ * @size: Number of bytes to read
+ * @flags: Message flags (MSG_DONTWAIT, etc...)
*
- * On return the msg structure contains the scatter/gather array passed in the
- * vec argument. The array is modified so that it consists of the unfilled
- * portion of the original array.
+ * On return the msg structure contains the scatter/gather array passed in the
+ * vec argument. The array is modified so that it consists of the unfilled
+ * portion of the original array.
*
- * The returned value is the total number of bytes received, or an error.
+ * The returned value is the total number of bytes received, or an error.
*/
+
int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size, int flags)
{
@@ -1005,6 +1070,13 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
* what to do with it - that's up to the protocol still.
*/
+/**
+ * get_net_ns - increment the refcount of the network namespace
+ * @ns: common namespace (net)
+ *
+ * Returns the net's common namespace.
+ */
+
struct ns_common *get_net_ns(struct ns_common *ns)
{
return &get_net(container_of(ns, struct net, ns))->ns;
@@ -1099,6 +1171,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return err;
}
+/**
+ * sock_create_lite - creates a socket
+ * @family: protocol family (AF_INET, ...)
+ * @type: communication type (SOCK_STREAM, ...)
+ * @protocol: protocol (0, ...)
+ * @res: new socket
+ *
+ * Creates a new socket and assigns it to @res, passing through LSM.
+ * The new socket initialization is not complete, see kernel_accept().
+ * Returns 0 or an error. On failure @res is set to %NULL.
+ * This function internally uses GFP_KERNEL.
+ */
+
int sock_create_lite(int family, int type, int protocol, struct socket **res)
{
int err;
@@ -1224,6 +1309,21 @@ call_kill:
}
EXPORT_SYMBOL(sock_wake_async);
+/**
+ * __sock_create - creates a socket
+ * @net: net namespace
+ * @family: protocol family (AF_INET, ...)
+ * @type: communication type (SOCK_STREAM, ...)
+ * @protocol: protocol (0, ...)
+ * @res: new socket
+ * @kern: boolean for kernel space sockets
+ *
+ * Creates a new socket and assigns it to @res, passing through LSM.
+ * Returns 0 or an error. On failure @res is set to %NULL. @kern must
+ * be set to true if the socket resides in kernel space.
+ * This function internally uses GFP_KERNEL.
+ */
+
int __sock_create(struct net *net, int family, int type, int protocol,
struct socket **res, int kern)
{
@@ -1333,12 +1433,35 @@ out_release:
}
EXPORT_SYMBOL(__sock_create);
+/**
+ * sock_create - creates a socket
+ * @family: protocol family (AF_INET, ...)
+ * @type: communication type (SOCK_STREAM, ...)
+ * @protocol: protocol (0, ...)
+ * @res: new socket
+ *
+ * A wrapper around __sock_create().
+ * Returns 0 or an error. This function internally uses GFP_KERNEL.
+ */
+
int sock_create(int family, int type, int protocol, struct socket **res)
{
return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
}
EXPORT_SYMBOL(sock_create);
+/**
+ * sock_create_kern - creates a socket (kernel space)
+ * @net: net namespace
+ * @family: protocol family (AF_INET, ...)
+ * @type: communication type (SOCK_STREAM, ...)
+ * @protocol: protocol (0, ...)
+ * @res: new socket
+ *
+ * A wrapper around __sock_create().
+ * Returns 0 or an error. This function internally uses GFP_KERNEL.
+ */
+
int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
{
return __sock_create(net, family, type, protocol, res, 1);
@@ -3322,18 +3445,46 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd,
}
#endif
+/**
+ * kernel_bind - bind an address to a socket (kernel space)
+ * @sock: socket
+ * @addr: address
+ * @addrlen: length of address
+ *
+ * Returns 0 or an error.
+ */
+
int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
{
return sock->ops->bind(sock, addr, addrlen);
}
EXPORT_SYMBOL(kernel_bind);
+/**
+ * kernel_listen - move socket to listening state (kernel space)
+ * @sock: socket
+ * @backlog: pending connections queue size
+ *
+ * Returns 0 or an error.
+ */
+
int kernel_listen(struct socket *sock, int backlog)
{
return sock->ops->listen(sock, backlog);
}
EXPORT_SYMBOL(kernel_listen);
+/**
+ * kernel_accept - accept a connection (kernel space)
+ * @sock: listening socket
+ * @newsock: new connected socket
+ * @flags: flags
+ *
+ * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
+ * If it fails, @newsock is guaranteed to be %NULL.
+ * Returns 0 or an error.
+ */
+
int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
{
struct sock *sk = sock->sk;
@@ -3359,6 +3510,19 @@ done:
}
EXPORT_SYMBOL(kernel_accept);
+/**
+ * kernel_connect - connect a socket (kernel space)
+ * @sock: socket
+ * @addr: address
+ * @addrlen: address length
+ * @flags: flags (O_NONBLOCK, ...)
+ *
+ * For datagram sockets, @addr is the addres to which datagrams are sent
+ * by default, and the only address from which datagrams are received.
+ * For stream sockets, attempts to connect to @addr.
+ * Returns 0 or an error code.
+ */
+
int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
int flags)
{
@@ -3366,18 +3530,48 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
}
EXPORT_SYMBOL(kernel_connect);
+/**
+ * kernel_getsockname - get the address which the socket is bound (kernel space)
+ * @sock: socket
+ * @addr: address holder
+ *
+ * Fills the @addr pointer with the address which the socket is bound.
+ * Returns 0 or an error code.
+ */
+
int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
{
return sock->ops->getname(sock, addr, 0);
}
EXPORT_SYMBOL(kernel_getsockname);
+/**
+ * kernel_peername - get the address which the socket is connected (kernel space)
+ * @sock: socket
+ * @addr: address holder
+ *
+ * Fills the @addr pointer with the address which the socket is connected.
+ * Returns 0 or an error code.
+ */
+
int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
{
return sock->ops->getname(sock, addr, 1);
}
EXPORT_SYMBOL(kernel_getpeername);
+/**
+ * kernel_getsockopt - get a socket option (kernel space)
+ * @sock: socket
+ * @level: API level (SOL_SOCKET, ...)
+ * @optname: option tag
+ * @optval: option value
+ * @optlen: option length
+ *
+ * Assigns the option length to @optlen.
+ * Returns 0 or an error.
+ */
+
int kernel_getsockopt(struct socket *sock, int level, int optname,
char *optval, int *optlen)
{
@@ -3400,6 +3594,17 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
}
EXPORT_SYMBOL(kernel_getsockopt);
+/**
+ * kernel_setsockopt - set a socket option (kernel space)
+ * @sock: socket
+ * @level: API level (SOL_SOCKET, ...)
+ * @optname: option tag
+ * @optval: option value
+ * @optlen: option length
+ *
+ * Returns 0 or an error.
+ */
+
int kernel_setsockopt(struct socket *sock, int level, int optname,
char *optval, unsigned int optlen)
{
@@ -3420,6 +3625,17 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
}
EXPORT_SYMBOL(kernel_setsockopt);
+/**
+ * kernel_sendpage - send a &page through a socket (kernel space)
+ * @sock: socket
+ * @page: page
+ * @offset: page offset
+ * @size: total size in bytes
+ * @flags: flags (MSG_DONTWAIT, ...)
+ *
+ * Returns the total amount sent in bytes or an error.
+ */
+
int kernel_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags)
{
@@ -3430,6 +3646,18 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
}
EXPORT_SYMBOL(kernel_sendpage);
+/**
+ * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
+ * @sk: sock
+ * @page: page
+ * @offset: page offset
+ * @size: total size in bytes
+ * @flags: flags (MSG_DONTWAIT, ...)
+ *
+ * Returns the total amount sent in bytes or an error.
+ * Caller must hold @sk.
+ */
+
int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags)
{
@@ -3443,17 +3671,30 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
}
EXPORT_SYMBOL(kernel_sendpage_locked);
+/**
+ * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
+ * @sock: socket
+ * @how: connection part
+ *
+ * Returns 0 or an error.
+ */
+
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
{
return sock->ops->shutdown(sock, how);
}
EXPORT_SYMBOL(kernel_sock_shutdown);
-/* This routine returns the IP overhead imposed by a socket i.e.
- * the length of the underlying IP header, depending on whether
- * this is an IPv4 or IPv6 socket and the length from IP options turned
- * on at the socket. Assumes that the caller has a lock on the socket.
+/**
+ * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
+ * @sk: socket
+ *
+ * This routine returns the IP overhead imposed by a socket i.e.
+ * the length of the underlying IP header, depending on whether
+ * this is an IPv4 or IPv6 socket and the length from IP options turned
+ * on at the socket. Assumes that the caller has a lock on the socket.
*/
+
u32 kernel_sock_ip_overhead(struct sock *sk)
{
struct inet_sock *inet;
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index da1a676860ca..860dcfb95ee4 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -550,6 +550,8 @@ EXPORT_SYMBOL_GPL(strp_check_rcv);
static int __init strp_mod_init(void)
{
strp_wq = create_singlethread_workqueue("kstrp");
+ if (unlikely(!strp_wq))
+ return -ENOMEM;
return 0;
}
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index ac09ca803296..83f5617bae07 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -34,6 +34,22 @@ config RPCSEC_GSS_KRB5
If unsure, say Y.
+config CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
+ bool "Secure RPC: Disable insecure Kerberos encryption types"
+ depends on RPCSEC_GSS_KRB5
+ default n
+ help
+ Choose Y here to disable the use of deprecated encryption types
+ with the Kerberos version 5 GSS-API mechanism (RFC 1964). The
+ deprecated encryption types include DES-CBC-MD5, DES-CBC-CRC,
+ and DES-CBC-MD4. These types were deprecated by RFC 6649 because
+ they were found to be insecure.
+
+ N is the default because many sites have deployed KDCs and
+ keytabs that contain only these deprecated encryption types.
+ Choosing Y prevents the use of known-insecure encryption types
+ but might result in compatibility problems.
+
config SUNRPC_DEBUG
bool "RPC: Enable dprintk debugging"
depends on SUNRPC && SYSCTL
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index f3023bbc0b7f..e7861026b9e5 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -17,9 +17,7 @@
#include <linux/sunrpc/gss_api.h>
#include <linux/spinlock.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_AUTH
-#endif
+#include <trace/events/sunrpc.h>
#define RPC_CREDCACHE_DEFAULT_HASHBITS (4)
struct rpc_cred_cache {
@@ -267,8 +265,6 @@ rpcauth_list_flavors(rpc_authflavor_t *array, int size)
}
}
rcu_read_unlock();
-
- dprintk("RPC: %s returns %d\n", __func__, result);
return result;
}
EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
@@ -636,9 +632,6 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
struct rpc_cred *ret;
const struct cred *cred = current_cred();
- dprintk("RPC: looking up %s cred\n",
- auth->au_ops->au_name);
-
memset(&acred, 0, sizeof(acred));
acred.cred = cred;
ret = auth->au_ops->lookup_cred(auth, &acred, flags);
@@ -670,8 +663,6 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
};
struct rpc_cred *ret;
- dprintk("RPC: %5u looking up %s cred\n",
- task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
put_cred(acred.cred);
return ret;
@@ -688,8 +679,6 @@ rpcauth_bind_machine_cred(struct rpc_task *task, int lookupflags)
if (!acred.principal)
return NULL;
- dprintk("RPC: %5u looking up %s machine cred\n",
- task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
}
@@ -698,8 +687,6 @@ rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
{
struct rpc_auth *auth = task->tk_client->cl_auth;
- dprintk("RPC: %5u looking up %s cred\n",
- task->tk_pid, auth->au_ops->au_name);
return rpcauth_lookupcred(auth, lookupflags);
}
@@ -771,75 +758,102 @@ destroy:
}
EXPORT_SYMBOL_GPL(put_rpccred);
-__be32 *
-rpcauth_marshcred(struct rpc_task *task, __be32 *p)
+/**
+ * rpcauth_marshcred - Append RPC credential to end of @xdr
+ * @task: controlling RPC task
+ * @xdr: xdr_stream containing initial portion of RPC Call header
+ *
+ * On success, an appropriate verifier is added to @xdr, @xdr is
+ * updated to point past the verifier, and zero is returned.
+ * Otherwise, @xdr is in an undefined state and a negative errno
+ * is returned.
+ */
+int rpcauth_marshcred(struct rpc_task *task, struct xdr_stream *xdr)
{
- struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+ const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
- dprintk("RPC: %5u marshaling %s cred %p\n",
- task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
-
- return cred->cr_ops->crmarshal(task, p);
+ return ops->crmarshal(task, xdr);
}
-__be32 *
-rpcauth_checkverf(struct rpc_task *task, __be32 *p)
+/**
+ * rpcauth_wrap_req_encode - XDR encode the RPC procedure
+ * @task: controlling RPC task
+ * @xdr: stream where on-the-wire bytes are to be marshalled
+ *
+ * On success, @xdr contains the encoded and wrapped message.
+ * Otherwise, @xdr is in an undefined state.
+ */
+int rpcauth_wrap_req_encode(struct rpc_task *task, struct xdr_stream *xdr)
{
- struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+ kxdreproc_t encode = task->tk_msg.rpc_proc->p_encode;
- dprintk("RPC: %5u validating %s cred %p\n",
- task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
-
- return cred->cr_ops->crvalidate(task, p);
+ encode(task->tk_rqstp, xdr, task->tk_msg.rpc_argp);
+ return 0;
}
+EXPORT_SYMBOL_GPL(rpcauth_wrap_req_encode);
-static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
- __be32 *data, void *obj)
+/**
+ * rpcauth_wrap_req - XDR encode and wrap the RPC procedure
+ * @task: controlling RPC task
+ * @xdr: stream where on-the-wire bytes are to be marshalled
+ *
+ * On success, @xdr contains the encoded and wrapped message,
+ * and zero is returned. Otherwise, @xdr is in an undefined
+ * state and a negative errno is returned.
+ */
+int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
{
- struct xdr_stream xdr;
+ const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
- xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data);
- encode(rqstp, &xdr, obj);
+ return ops->crwrap_req(task, xdr);
}
+/**
+ * rpcauth_checkverf - Validate verifier in RPC Reply header
+ * @task: controlling RPC task
+ * @xdr: xdr_stream containing RPC Reply header
+ *
+ * On success, @xdr is updated to point past the verifier and
+ * zero is returned. Otherwise, @xdr is in an undefined state
+ * and a negative errno is returned.
+ */
int
-rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp,
- __be32 *data, void *obj)
+rpcauth_checkverf(struct rpc_task *task, struct xdr_stream *xdr)
{
- struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+ const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
- dprintk("RPC: %5u using %s cred %p to wrap rpc data\n",
- task->tk_pid, cred->cr_ops->cr_name, cred);
- if (cred->cr_ops->crwrap_req)
- return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
- /* By default, we encode the arguments normally. */
- rpcauth_wrap_req_encode(encode, rqstp, data, obj);
- return 0;
+ return ops->crvalidate(task, xdr);
}
-static int
-rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
- __be32 *data, void *obj)
+/**
+ * rpcauth_unwrap_resp_decode - Invoke XDR decode function
+ * @task: controlling RPC task
+ * @xdr: stream where the Reply message resides
+ *
+ * Returns zero on success; otherwise a negative errno is returned.
+ */
+int
+rpcauth_unwrap_resp_decode(struct rpc_task *task, struct xdr_stream *xdr)
{
- struct xdr_stream xdr;
+ kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data);
- return decode(rqstp, &xdr, obj);
+ return decode(task->tk_rqstp, xdr, task->tk_msg.rpc_resp);
}
+EXPORT_SYMBOL_GPL(rpcauth_unwrap_resp_decode);
+/**
+ * rpcauth_unwrap_resp - Invoke unwrap and decode function for the cred
+ * @task: controlling RPC task
+ * @xdr: stream where the Reply message resides
+ *
+ * Returns zero on success; otherwise a negative errno is returned.
+ */
int
-rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
- __be32 *data, void *obj)
+rpcauth_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr)
{
- struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+ const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
- dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n",
- task->tk_pid, cred->cr_ops->cr_name, cred);
- if (cred->cr_ops->crunwrap_resp)
- return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
- data, obj);
- /* By default, we decode the arguments normally. */
- return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
+ return ops->crunwrap_resp(task, xdr);
}
bool
@@ -865,8 +879,6 @@ rpcauth_refreshcred(struct rpc_task *task)
goto out;
cred = task->tk_rqstp->rq_cred;
}
- dprintk("RPC: %5u refreshing %s cred %p\n",
- task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
err = cred->cr_ops->crrefresh(task);
out:
@@ -880,8 +892,6 @@ rpcauth_invalcred(struct rpc_task *task)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
- dprintk("RPC: %5u invalidating %s cred %p\n",
- task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
if (cred)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
}
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index c374268b008f..4a29f4c5dac4 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
auth_rpcgss-y := auth_gss.o gss_generic_token.o \
gss_mech_switch.o svcauth_gss.o \
- gss_rpc_upcall.o gss_rpc_xdr.o
+ gss_rpc_upcall.o gss_rpc_xdr.o trace.o
obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 1531b0219344..3fd56c0c90ae 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: BSD-3-Clause
/*
* linux/net/sunrpc/auth_gss/auth_gss.c
*
@@ -8,34 +9,8 @@
*
* Dug Song <dugsong@monkey.org>
* Andy Adamson <andros@umich.edu>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
@@ -55,6 +30,8 @@
#include "../netns.h"
+#include <trace/events/rpcgss.h>
+
static const struct rpc_authops authgss_ops;
static const struct rpc_credops gss_credops;
@@ -260,6 +237,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
}
ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS);
if (ret < 0) {
+ trace_rpcgss_import_ctx(ret);
p = ERR_PTR(ret);
goto err;
}
@@ -275,12 +253,9 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
if (IS_ERR(p))
goto err;
done:
- dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n",
- __func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len,
- ctx->gc_acceptor.data);
- return p;
+ trace_rpcgss_context(ctx->gc_expiry, now, timeout,
+ ctx->gc_acceptor.len, ctx->gc_acceptor.data);
err:
- dprintk("RPC: %s returns error %ld\n", __func__, -PTR_ERR(p));
return p;
}
@@ -354,10 +329,8 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth
if (auth && pos->auth->service != auth->service)
continue;
refcount_inc(&pos->count);
- dprintk("RPC: %s found msg %p\n", __func__, pos);
return pos;
}
- dprintk("RPC: %s found nothing\n", __func__);
return NULL;
}
@@ -456,7 +429,7 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
size_t buflen = sizeof(gss_msg->databuf);
int len;
- len = scnprintf(p, buflen, "mech=%s uid=%d ", mech->gm_name,
+ len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name,
from_kuid(&init_user_ns, gss_msg->uid));
buflen -= len;
p += len;
@@ -467,7 +440,7 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
* identity that we are authenticating to.
*/
if (target_name) {
- len = scnprintf(p, buflen, "target=%s ", target_name);
+ len = scnprintf(p, buflen, " target=%s", target_name);
buflen -= len;
p += len;
gss_msg->msg.len += len;
@@ -487,11 +460,11 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
char *c = strchr(service_name, '@');
if (!c)
- len = scnprintf(p, buflen, "service=%s ",
+ len = scnprintf(p, buflen, " service=%s",
service_name);
else
len = scnprintf(p, buflen,
- "service=%.*s srchost=%s ",
+ " service=%.*s srchost=%s",
(int)(c - service_name),
service_name, c + 1);
buflen -= len;
@@ -500,17 +473,17 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
}
if (mech->gm_upcall_enctypes) {
- len = scnprintf(p, buflen, "enctypes=%s ",
+ len = scnprintf(p, buflen, " enctypes=%s",
mech->gm_upcall_enctypes);
buflen -= len;
p += len;
gss_msg->msg.len += len;
}
+ trace_rpcgss_upcall_msg(gss_msg->databuf);
len = scnprintf(p, buflen, "\n");
if (len == 0)
goto out_overflow;
gss_msg->msg.len += len;
-
gss_msg->msg.data = gss_msg->databuf;
return 0;
out_overflow:
@@ -603,8 +576,6 @@ gss_refresh_upcall(struct rpc_task *task)
struct rpc_pipe *pipe;
int err = 0;
- dprintk("RPC: %5u %s for uid %u\n",
- task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
gss_msg = gss_setup_upcall(gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
/* XXX: warning on the first, under the assumption we
@@ -612,7 +583,8 @@ gss_refresh_upcall(struct rpc_task *task)
warn_gssd();
task->tk_timeout = 15*HZ;
rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL);
- return -EAGAIN;
+ err = -EAGAIN;
+ goto out;
}
if (IS_ERR(gss_msg)) {
err = PTR_ERR(gss_msg);
@@ -635,9 +607,8 @@ gss_refresh_upcall(struct rpc_task *task)
spin_unlock(&pipe->lock);
gss_release_msg(gss_msg);
out:
- dprintk("RPC: %5u %s for uid %u result %d\n",
- task->tk_pid, __func__,
- from_kuid(&init_user_ns, cred->cr_cred->fsuid), err);
+ trace_rpcgss_upcall_result(from_kuid(&init_user_ns,
+ cred->cr_cred->fsuid), err);
return err;
}
@@ -652,14 +623,13 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
DEFINE_WAIT(wait);
int err;
- dprintk("RPC: %s for uid %u\n",
- __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
retry:
err = 0;
/* if gssd is down, just skip upcalling altogether */
if (!gssd_running(net)) {
warn_gssd();
- return -EACCES;
+ err = -EACCES;
+ goto out;
}
gss_msg = gss_setup_upcall(gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
@@ -700,8 +670,8 @@ out_intr:
finish_wait(&gss_msg->waitqueue, &wait);
gss_release_msg(gss_msg);
out:
- dprintk("RPC: %s for uid %u result %d\n",
- __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid), err);
+ trace_rpcgss_upcall_result(from_kuid(&init_user_ns,
+ cred->cr_cred->fsuid), err);
return err;
}
@@ -794,7 +764,6 @@ err_put_ctx:
err:
kfree(buf);
out:
- dprintk("RPC: %s returning %zd\n", __func__, err);
return err;
}
@@ -863,8 +832,6 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg);
if (msg->errno < 0) {
- dprintk("RPC: %s releasing msg %p\n",
- __func__, gss_msg);
refcount_inc(&gss_msg->count);
gss_unhash_msg(gss_msg);
if (msg->errno == -ETIMEDOUT)
@@ -1024,8 +991,6 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
struct rpc_auth * auth;
int err = -ENOMEM; /* XXX? */
- dprintk("RPC: creating GSS authenticator for client %p\n", clnt);
-
if (!try_module_get(THIS_MODULE))
return ERR_PTR(err);
if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
@@ -1041,10 +1006,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
gss_auth->net = get_net(rpc_net_ns(clnt));
err = -EINVAL;
gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
- if (!gss_auth->mech) {
- dprintk("RPC: Pseudoflavor %d not found!\n", flavor);
+ if (!gss_auth->mech)
goto err_put_net;
- }
gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
if (gss_auth->service == 0)
goto err_put_mech;
@@ -1053,6 +1016,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
auth = &gss_auth->rpc_auth;
auth->au_cslack = GSS_CRED_SLACK >> 2;
auth->au_rslack = GSS_VERF_SLACK >> 2;
+ auth->au_verfsize = GSS_VERF_SLACK >> 2;
+ auth->au_ralign = GSS_VERF_SLACK >> 2;
auth->au_flags = 0;
auth->au_ops = &authgss_ops;
auth->au_flavor = flavor;
@@ -1099,6 +1064,7 @@ err_free:
kfree(gss_auth);
out_dec:
module_put(THIS_MODULE);
+ trace_rpcgss_createauth(flavor, err);
return ERR_PTR(err);
}
@@ -1135,9 +1101,6 @@ gss_destroy(struct rpc_auth *auth)
struct gss_auth *gss_auth = container_of(auth,
struct gss_auth, rpc_auth);
- dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
- auth, auth->au_flavor);
-
if (hash_hashed(&gss_auth->hash)) {
spin_lock(&gss_auth_hash_lock);
hash_del(&gss_auth->hash);
@@ -1245,7 +1208,7 @@ gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
struct gss_cred *new;
/* Make a copy of the cred so that we can reference count it */
- new = kzalloc(sizeof(*gss_cred), GFP_NOIO);
+ new = kzalloc(sizeof(*gss_cred), GFP_NOFS);
if (new) {
struct auth_cred acred = {
.cred = gss_cred->gc_base.cr_cred,
@@ -1300,8 +1263,6 @@ gss_send_destroy_context(struct rpc_cred *cred)
static void
gss_do_free_ctx(struct gss_cl_ctx *ctx)
{
- dprintk("RPC: %s\n", __func__);
-
gss_delete_sec_context(&ctx->gc_gss_ctx);
kfree(ctx->gc_wire_ctx.data);
kfree(ctx->gc_acceptor.data);
@@ -1324,7 +1285,6 @@ gss_free_ctx(struct gss_cl_ctx *ctx)
static void
gss_free_cred(struct gss_cred *gss_cred)
{
- dprintk("RPC: %s cred=%p\n", __func__, gss_cred);
kfree(gss_cred);
}
@@ -1381,10 +1341,6 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
struct gss_cred *cred = NULL;
int err = -ENOMEM;
- dprintk("RPC: %s for uid %d, flavor %d\n",
- __func__, from_kuid(&init_user_ns, acred->cred->fsuid),
- auth->au_flavor);
-
if (!(cred = kzalloc(sizeof(*cred), gfp)))
goto out_err;
@@ -1400,7 +1356,6 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
return &cred->gc_base;
out_err:
- dprintk("RPC: %s failed with error %d\n", __func__, err);
return ERR_PTR(err);
}
@@ -1526,69 +1481,84 @@ out:
}
/*
-* Marshal credentials.
-* Maybe we should keep a cached credential for performance reasons.
-*/
-static __be32 *
-gss_marshal(struct rpc_task *task, __be32 *p)
+ * Marshal credentials.
+ *
+ * The expensive part is computing the verifier. We can't cache a
+ * pre-computed version of the verifier because the seqno, which
+ * is different every time, is included in the MIC.
+ */
+static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_cred *cred = req->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
- __be32 *cred_len;
+ __be32 *p, *cred_len;
u32 maj_stat = 0;
struct xdr_netobj mic;
struct kvec iov;
struct xdr_buf verf_buf;
+ int status;
- dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
+ /* Credential */
- *p++ = htonl(RPC_AUTH_GSS);
+ p = xdr_reserve_space(xdr, 7 * sizeof(*p) +
+ ctx->gc_wire_ctx.len);
+ if (!p)
+ goto marshal_failed;
+ *p++ = rpc_auth_gss;
cred_len = p++;
spin_lock(&ctx->gc_seq_lock);
req->rq_seqno = (ctx->gc_seq < MAXSEQ) ? ctx->gc_seq++ : MAXSEQ;
spin_unlock(&ctx->gc_seq_lock);
if (req->rq_seqno == MAXSEQ)
- goto out_expired;
+ goto expired;
+ trace_rpcgss_seqno(task);
- *p++ = htonl((u32) RPC_GSS_VERSION);
- *p++ = htonl((u32) ctx->gc_proc);
- *p++ = htonl((u32) req->rq_seqno);
- *p++ = htonl((u32) gss_cred->gc_service);
+ *p++ = cpu_to_be32(RPC_GSS_VERSION);
+ *p++ = cpu_to_be32(ctx->gc_proc);
+ *p++ = cpu_to_be32(req->rq_seqno);
+ *p++ = cpu_to_be32(gss_cred->gc_service);
p = xdr_encode_netobj(p, &ctx->gc_wire_ctx);
- *cred_len = htonl((p - (cred_len + 1)) << 2);
+ *cred_len = cpu_to_be32((p - (cred_len + 1)) << 2);
+
+ /* Verifier */
/* We compute the checksum for the verifier over the xdr-encoded bytes
* starting with the xid and ending at the end of the credential: */
- iov.iov_base = xprt_skip_transport_header(req->rq_xprt,
- req->rq_snd_buf.head[0].iov_base);
+ iov.iov_base = req->rq_snd_buf.head[0].iov_base;
iov.iov_len = (u8 *)p - (u8 *)iov.iov_base;
xdr_buf_from_iov(&iov, &verf_buf);
- /* set verifier flavor*/
- *p++ = htonl(RPC_AUTH_GSS);
-
+ p = xdr_reserve_space(xdr, sizeof(*p));
+ if (!p)
+ goto marshal_failed;
+ *p++ = rpc_auth_gss;
mic.data = (u8 *)(p + 1);
maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
- if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
- goto out_expired;
- } else if (maj_stat != 0) {
- pr_warn("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
- task->tk_status = -EIO;
- goto out_put_ctx;
- }
- p = xdr_encode_opaque(p, NULL, mic.len);
+ if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+ goto expired;
+ else if (maj_stat != 0)
+ goto bad_mic;
+ if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
+ goto marshal_failed;
+ status = 0;
+out:
gss_put_ctx(ctx);
- return p;
-out_expired:
+ return status;
+expired:
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
- task->tk_status = -EKEYEXPIRED;
-out_put_ctx:
- gss_put_ctx(ctx);
- return NULL;
+ status = -EKEYEXPIRED;
+ goto out;
+marshal_failed:
+ status = -EMSGSIZE;
+ goto out;
+bad_mic:
+ trace_rpcgss_get_mic(task, maj_stat);
+ status = -EIO;
+ goto out;
}
static int gss_renew_cred(struct rpc_task *task)
@@ -1662,116 +1632,105 @@ gss_refresh_null(struct rpc_task *task)
return 0;
}
-static __be32 *
-gss_validate(struct rpc_task *task, __be32 *p)
+static int
+gss_validate(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
- __be32 *seq = NULL;
+ __be32 *p, *seq = NULL;
struct kvec iov;
struct xdr_buf verf_buf;
struct xdr_netobj mic;
- u32 flav,len;
- u32 maj_stat;
- __be32 *ret = ERR_PTR(-EIO);
+ u32 len, maj_stat;
+ int status;
- dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
+ p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+ if (!p)
+ goto validate_failed;
+ if (*p++ != rpc_auth_gss)
+ goto validate_failed;
+ len = be32_to_cpup(p);
+ if (len > RPC_MAX_AUTH_SIZE)
+ goto validate_failed;
+ p = xdr_inline_decode(xdr, len);
+ if (!p)
+ goto validate_failed;
- flav = ntohl(*p++);
- if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE)
- goto out_bad;
- if (flav != RPC_AUTH_GSS)
- goto out_bad;
seq = kmalloc(4, GFP_NOFS);
if (!seq)
- goto out_bad;
- *seq = htonl(task->tk_rqstp->rq_seqno);
+ goto validate_failed;
+ *seq = cpu_to_be32(task->tk_rqstp->rq_seqno);
iov.iov_base = seq;
iov.iov_len = 4;
xdr_buf_from_iov(&iov, &verf_buf);
mic.data = (u8 *)p;
mic.len = len;
-
- ret = ERR_PTR(-EACCES);
maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
- if (maj_stat) {
- dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n",
- task->tk_pid, __func__, maj_stat);
- goto out_bad;
- }
+ if (maj_stat)
+ goto bad_mic;
+
/* We leave it to unwrap to calculate au_rslack. For now we just
* calculate the length of the verifier: */
cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2;
+ status = 0;
+out:
gss_put_ctx(ctx);
- dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n",
- task->tk_pid, __func__);
- kfree(seq);
- return p + XDR_QUADLEN(len);
-out_bad:
- gss_put_ctx(ctx);
- dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__,
- PTR_ERR(ret));
kfree(seq);
- return ret;
-}
-
-static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
- __be32 *p, void *obj)
-{
- struct xdr_stream xdr;
+ return status;
- xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p);
- encode(rqstp, &xdr, obj);
+validate_failed:
+ status = -EIO;
+ goto out;
+bad_mic:
+ trace_rpcgss_verify_mic(task, maj_stat);
+ status = -EACCES;
+ goto out;
}
-static inline int
-gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- kxdreproc_t encode, struct rpc_rqst *rqstp,
- __be32 *p, void *obj)
+static int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+ struct rpc_task *task, struct xdr_stream *xdr)
{
- struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
- struct xdr_buf integ_buf;
- __be32 *integ_len = NULL;
+ struct rpc_rqst *rqstp = task->tk_rqstp;
+ struct xdr_buf integ_buf, *snd_buf = &rqstp->rq_snd_buf;
struct xdr_netobj mic;
- u32 offset;
- __be32 *q;
- struct kvec *iov;
- u32 maj_stat = 0;
- int status = -EIO;
+ __be32 *p, *integ_len;
+ u32 offset, maj_stat;
+ p = xdr_reserve_space(xdr, 2 * sizeof(*p));
+ if (!p)
+ goto wrap_failed;
integ_len = p++;
- offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
- *p++ = htonl(rqstp->rq_seqno);
+ *p = cpu_to_be32(rqstp->rq_seqno);
- gss_wrap_req_encode(encode, rqstp, p, obj);
+ if (rpcauth_wrap_req_encode(task, xdr))
+ goto wrap_failed;
+ offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
if (xdr_buf_subsegment(snd_buf, &integ_buf,
offset, snd_buf->len - offset))
- return status;
- *integ_len = htonl(integ_buf.len);
+ goto wrap_failed;
+ *integ_len = cpu_to_be32(integ_buf.len);
- /* guess whether we're in the head or the tail: */
- if (snd_buf->page_len || snd_buf->tail[0].iov_len)
- iov = snd_buf->tail;
- else
- iov = snd_buf->head;
- p = iov->iov_base + iov->iov_len;
+ p = xdr_reserve_space(xdr, 0);
+ if (!p)
+ goto wrap_failed;
mic.data = (u8 *)(p + 1);
-
maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
- status = -EIO; /* XXX? */
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
else if (maj_stat)
- return status;
- q = xdr_encode_opaque(p, NULL, mic.len);
-
- offset = (u8 *)q - (u8 *)p;
- iov->iov_len += offset;
- snd_buf->len += offset;
+ goto bad_mic;
+ /* Check that the trailing MIC fit in the buffer, after the fact */
+ if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
+ goto wrap_failed;
return 0;
+wrap_failed:
+ return -EMSGSIZE;
+bad_mic:
+ trace_rpcgss_get_mic(task, maj_stat);
+ return -EIO;
}
static void
@@ -1822,61 +1781,62 @@ out:
return -EAGAIN;
}
-static inline int
-gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- kxdreproc_t encode, struct rpc_rqst *rqstp,
- __be32 *p, void *obj)
+static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+ struct rpc_task *task, struct xdr_stream *xdr)
{
+ struct rpc_rqst *rqstp = task->tk_rqstp;
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
- u32 offset;
- u32 maj_stat;
+ u32 pad, offset, maj_stat;
int status;
- __be32 *opaque_len;
+ __be32 *p, *opaque_len;
struct page **inpages;
int first;
- int pad;
struct kvec *iov;
- char *tmp;
+ status = -EIO;
+ p = xdr_reserve_space(xdr, 2 * sizeof(*p));
+ if (!p)
+ goto wrap_failed;
opaque_len = p++;
- offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
- *p++ = htonl(rqstp->rq_seqno);
+ *p = cpu_to_be32(rqstp->rq_seqno);
- gss_wrap_req_encode(encode, rqstp, p, obj);
+ if (rpcauth_wrap_req_encode(task, xdr))
+ goto wrap_failed;
status = alloc_enc_pages(rqstp);
- if (status)
- return status;
+ if (unlikely(status))
+ goto wrap_failed;
first = snd_buf->page_base >> PAGE_SHIFT;
inpages = snd_buf->pages + first;
snd_buf->pages = rqstp->rq_enc_pages;
snd_buf->page_base -= first << PAGE_SHIFT;
/*
- * Give the tail its own page, in case we need extra space in the
- * head when wrapping:
+ * Move the tail into its own page, in case gss_wrap needs
+ * more space in the head when wrapping.
*
- * call_allocate() allocates twice the slack space required
- * by the authentication flavor to rq_callsize.
- * For GSS, slack is GSS_CRED_SLACK.
+ * Still... Why can't gss_wrap just slide the tail down?
*/
if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
+ char *tmp;
+
tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
snd_buf->tail[0].iov_base = tmp;
}
+ offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
/* slack space should prevent this ever happening: */
- BUG_ON(snd_buf->len > snd_buf->buflen);
- status = -EIO;
+ if (unlikely(snd_buf->len > snd_buf->buflen))
+ goto wrap_failed;
/* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
* done anyway, so it's safe to put the request on the wire: */
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
else if (maj_stat)
- return status;
+ goto bad_wrap;
- *opaque_len = htonl(snd_buf->len - offset);
- /* guess whether we're in the head or the tail: */
+ *opaque_len = cpu_to_be32(snd_buf->len - offset);
+ /* guess whether the pad goes into the head or the tail: */
if (snd_buf->page_len || snd_buf->tail[0].iov_len)
iov = snd_buf->tail;
else
@@ -1888,118 +1848,154 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
snd_buf->len += pad;
return 0;
+wrap_failed:
+ return status;
+bad_wrap:
+ trace_rpcgss_wrap(task, maj_stat);
+ return -EIO;
}
-static int
-gss_wrap_req(struct rpc_task *task,
- kxdreproc_t encode, void *rqstp, __be32 *p, void *obj)
+static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
- int status = -EIO;
+ int status;
- dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
+ status = -EIO;
if (ctx->gc_proc != RPC_GSS_PROC_DATA) {
/* The spec seems a little ambiguous here, but I think that not
* wrapping context destruction requests makes the most sense.
*/
- gss_wrap_req_encode(encode, rqstp, p, obj);
- status = 0;
+ status = rpcauth_wrap_req_encode(task, xdr);
goto out;
}
switch (gss_cred->gc_service) {
case RPC_GSS_SVC_NONE:
- gss_wrap_req_encode(encode, rqstp, p, obj);
- status = 0;
+ status = rpcauth_wrap_req_encode(task, xdr);
break;
case RPC_GSS_SVC_INTEGRITY:
- status = gss_wrap_req_integ(cred, ctx, encode, rqstp, p, obj);
+ status = gss_wrap_req_integ(cred, ctx, task, xdr);
break;
case RPC_GSS_SVC_PRIVACY:
- status = gss_wrap_req_priv(cred, ctx, encode, rqstp, p, obj);
+ status = gss_wrap_req_priv(cred, ctx, task, xdr);
break;
+ default:
+ status = -EIO;
}
out:
gss_put_ctx(ctx);
- dprintk("RPC: %5u %s returning %d\n", task->tk_pid, __func__, status);
return status;
}
-static inline int
-gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- struct rpc_rqst *rqstp, __be32 **p)
+static int
+gss_unwrap_resp_auth(struct rpc_cred *cred)
{
- struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
- struct xdr_buf integ_buf;
+ struct rpc_auth *auth = cred->cr_auth;
+
+ auth->au_rslack = auth->au_verfsize;
+ auth->au_ralign = auth->au_verfsize;
+ return 0;
+}
+
+static int
+gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
+ struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr)
+{
+ struct xdr_buf integ_buf, *rcv_buf = &rqstp->rq_rcv_buf;
+ u32 data_offset, mic_offset, integ_len, maj_stat;
+ struct rpc_auth *auth = cred->cr_auth;
struct xdr_netobj mic;
- u32 data_offset, mic_offset;
- u32 integ_len;
- u32 maj_stat;
- int status = -EIO;
+ __be32 *p;
- integ_len = ntohl(*(*p)++);
+ p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+ if (unlikely(!p))
+ goto unwrap_failed;
+ integ_len = be32_to_cpup(p++);
if (integ_len & 3)
- return status;
- data_offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
+ goto unwrap_failed;
+ data_offset = (u8 *)(p) - (u8 *)rcv_buf->head[0].iov_base;
mic_offset = integ_len + data_offset;
if (mic_offset > rcv_buf->len)
- return status;
- if (ntohl(*(*p)++) != rqstp->rq_seqno)
- return status;
-
- if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
- mic_offset - data_offset))
- return status;
+ goto unwrap_failed;
+ if (be32_to_cpup(p) != rqstp->rq_seqno)
+ goto bad_seqno;
+ if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len))
+ goto unwrap_failed;
if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset))
- return status;
-
+ goto unwrap_failed;
maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat != GSS_S_COMPLETE)
- return status;
+ goto bad_mic;
+
+ auth->au_rslack = auth->au_verfsize + 2 + 1 + XDR_QUADLEN(mic.len);
+ auth->au_ralign = auth->au_verfsize + 2;
return 0;
+unwrap_failed:
+ trace_rpcgss_unwrap_failed(task);
+ return -EIO;
+bad_seqno:
+ trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(p));
+ return -EIO;
+bad_mic:
+ trace_rpcgss_verify_mic(task, maj_stat);
+ return -EIO;
}
-static inline int
-gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- struct rpc_rqst *rqstp, __be32 **p)
-{
- struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
- u32 offset;
- u32 opaque_len;
- u32 maj_stat;
- int status = -EIO;
-
- opaque_len = ntohl(*(*p)++);
- offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
+static int
+gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
+ struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr)
+{
+ struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
+ struct kvec *head = rqstp->rq_rcv_buf.head;
+ struct rpc_auth *auth = cred->cr_auth;
+ unsigned int savedlen = rcv_buf->len;
+ u32 offset, opaque_len, maj_stat;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+ if (unlikely(!p))
+ goto unwrap_failed;
+ opaque_len = be32_to_cpup(p++);
+ offset = (u8 *)(p) - (u8 *)head->iov_base;
if (offset + opaque_len > rcv_buf->len)
- return status;
- /* remove padding: */
+ goto unwrap_failed;
rcv_buf->len = offset + opaque_len;
maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat != GSS_S_COMPLETE)
- return status;
- if (ntohl(*(*p)++) != rqstp->rq_seqno)
- return status;
+ goto bad_unwrap;
+ /* gss_unwrap decrypted the sequence number */
+ if (be32_to_cpup(p++) != rqstp->rq_seqno)
+ goto bad_seqno;
- return 0;
-}
-
-static int
-gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
- __be32 *p, void *obj)
-{
- struct xdr_stream xdr;
+ /* gss_unwrap redacts the opaque blob from the head iovec.
+ * rcv_buf has changed, thus the stream needs to be reset.
+ */
+ xdr_init_decode(xdr, rcv_buf, p, rqstp);
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- return decode(rqstp, &xdr, obj);
+ auth->au_rslack = auth->au_verfsize + 2 +
+ XDR_QUADLEN(savedlen - rcv_buf->len);
+ auth->au_ralign = auth->au_verfsize + 2 +
+ XDR_QUADLEN(savedlen - rcv_buf->len);
+ return 0;
+unwrap_failed:
+ trace_rpcgss_unwrap_failed(task);
+ return -EIO;
+bad_seqno:
+ trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(--p));
+ return -EIO;
+bad_unwrap:
+ trace_rpcgss_unwrap(task, maj_stat);
+ return -EIO;
}
static bool
@@ -2014,14 +2010,14 @@ gss_xmit_need_reencode(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_cred *cred = req->rq_cred;
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
- u32 win, seq_xmit;
+ u32 win, seq_xmit = 0;
bool ret = true;
if (!ctx)
- return true;
+ goto out;
if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq)))
- goto out;
+ goto out_ctx;
seq_xmit = READ_ONCE(ctx->gc_seq_xmit);
while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) {
@@ -2030,56 +2026,51 @@ gss_xmit_need_reencode(struct rpc_task *task)
seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno);
if (seq_xmit == tmp) {
ret = false;
- goto out;
+ goto out_ctx;
}
}
win = ctx->gc_win;
if (win > 0)
ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win);
-out:
+
+out_ctx:
gss_put_ctx(ctx);
+out:
+ trace_rpcgss_need_reencode(task, seq_xmit, ret);
return ret;
}
static int
-gss_unwrap_resp(struct rpc_task *task,
- kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
+gss_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr)
{
- struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+ struct rpc_rqst *rqstp = task->tk_rqstp;
+ struct rpc_cred *cred = rqstp->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
- __be32 *savedp = p;
- struct kvec *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head;
- int savedlen = head->iov_len;
- int status = -EIO;
+ int status = -EIO;
if (ctx->gc_proc != RPC_GSS_PROC_DATA)
goto out_decode;
switch (gss_cred->gc_service) {
case RPC_GSS_SVC_NONE:
+ status = gss_unwrap_resp_auth(cred);
break;
case RPC_GSS_SVC_INTEGRITY:
- status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p);
- if (status)
- goto out;
+ status = gss_unwrap_resp_integ(task, cred, ctx, rqstp, xdr);
break;
case RPC_GSS_SVC_PRIVACY:
- status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p);
- if (status)
- goto out;
+ status = gss_unwrap_resp_priv(task, cred, ctx, rqstp, xdr);
break;
}
- /* take into account extra slack for integrity and privacy cases: */
- cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
- + (savedlen - head->iov_len);
+ if (status)
+ goto out;
+
out_decode:
- status = gss_unwrap_req_decode(decode, rqstp, p, obj);
+ status = rpcauth_unwrap_resp_decode(task, xdr);
out:
gss_put_ctx(ctx);
- dprintk("RPC: %5u %s returning %d\n",
- task->tk_pid, __func__, status);
return status;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index eab71fc7af3e..56cc85c5bc06 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: BSD-3-Clause
/*
* linux/net/sunrpc/gss_krb5_mech.c
*
@@ -6,32 +7,6 @@
*
* Andy Adamson <andros@umich.edu>
* J. Bruce Fields <bfields@umich.edu>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <crypto/hash.h>
@@ -53,6 +28,7 @@
static struct gss_api_mech gss_kerberos_mech; /* forward declaration */
static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
+#ifndef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
/*
* DES (All DES enctypes are mapped to the same gss functionality)
*/
@@ -74,6 +50,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.cksumlength = 8,
.keyed_cksum = 0,
},
+#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
/*
* RC4-HMAC
*/
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 5cdde6cb703a..14a0aff0cd84 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -570,14 +570,16 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
*/
movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
- BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
- buf->head[0].iov_len);
+ if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
+ buf->head[0].iov_len)
+ return GSS_S_FAILURE;
memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
/* Trim off the trailing "extra count" and checksum blob */
- xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
+ buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip;
+
return GSS_S_COMPLETE;
}
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 379318dff534..82060099a429 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: BSD-3-Clause
/*
* linux/net/sunrpc/gss_mech_switch.c
*
@@ -5,32 +6,6 @@
* All rights reserved.
*
* J. Bruce Fields <bfields@umich.edu>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <linux/types.h>
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 73dcda060335..0349f455a862 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* linux/net/sunrpc/gss_rpc_upcall.c
*
* Copyright (C) 2012 Simo Sorce <simo@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/types.h>
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.h b/net/sunrpc/auth_gss/gss_rpc_upcall.h
index 1e542aded90a..31e96344167e 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.h
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.h
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
* linux/net/sunrpc/gss_rpc_upcall.h
*
* Copyright (C) 2012 Simo Sorce <simo@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _GSS_RPC_UPCALL_H
@@ -45,4 +32,5 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data);
void init_gssp_clnt(struct sunrpc_net *);
int set_gssp_clnt(struct net *);
void clear_gssp_clnt(struct sunrpc_net *);
+
#endif /* _GSS_RPC_UPCALL_H */
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 006062ad5f58..2ff7b7083eba 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* GSS Proxy upcall module
*
* Copyright (C) 2012 Simo Sorce <simo@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/sunrpc/svcauth.h>
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
index 146c31032917..3f17411b7e65 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.h
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
* GSS Proxy upcall module
*
* Copyright (C) 2012 Simo Sorce <simo@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _LINUX_GSS_RPC_XDR_H
@@ -262,6 +249,4 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
#define GSSX_ARG_wrap_size_limit_sz 0
#define GSSX_RES_wrap_size_limit_sz 0
-
-
#endif /* _LINUX_GSS_RPC_XDR_H */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 152790ed309c..0c5d7896d6dd 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Neil Brown <neilb@cse.unsw.edu.au>
* J. Bruce Fields <bfields@umich.edu>
@@ -896,7 +897,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
if (svc_getnl(&buf->head[0]) != seq)
goto out;
/* trim off the mic and padding at the end before returning */
- xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
+ buf->len -= 4 + round_up_to_quad(mic.len);
stat = 0;
out:
kfree(mic.data);
diff --git a/net/sunrpc/auth_gss/trace.c b/net/sunrpc/auth_gss/trace.c
new file mode 100644
index 000000000000..5576f1e66de9
--- /dev/null
+++ b/net/sunrpc/auth_gss/trace.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, 2019 Oracle. All rights reserved.
+ */
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/gss_err.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/rpcgss.h>
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index d0ceac57c06e..41a633a4049e 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -59,15 +59,21 @@ nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags)
/*
* Marshal credential.
*/
-static __be32 *
-nul_marshal(struct rpc_task *task, __be32 *p)
+static int
+nul_marshal(struct rpc_task *task, struct xdr_stream *xdr)
{
- *p++ = htonl(RPC_AUTH_NULL);
- *p++ = 0;
- *p++ = htonl(RPC_AUTH_NULL);
- *p++ = 0;
-
- return p;
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4 * sizeof(*p));
+ if (!p)
+ return -EMSGSIZE;
+ /* Credential */
+ *p++ = rpc_auth_null;
+ *p++ = xdr_zero;
+ /* Verifier */
+ *p++ = rpc_auth_null;
+ *p = xdr_zero;
+ return 0;
}
/*
@@ -80,25 +86,19 @@ nul_refresh(struct rpc_task *task)
return 0;
}
-static __be32 *
-nul_validate(struct rpc_task *task, __be32 *p)
+static int
+nul_validate(struct rpc_task *task, struct xdr_stream *xdr)
{
- rpc_authflavor_t flavor;
- u32 size;
-
- flavor = ntohl(*p++);
- if (flavor != RPC_AUTH_NULL) {
- printk("RPC: bad verf flavor: %u\n", flavor);
- return ERR_PTR(-EIO);
- }
-
- size = ntohl(*p++);
- if (size != 0) {
- printk("RPC: bad verf size: %u\n", size);
- return ERR_PTR(-EIO);
- }
-
- return p;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+ if (!p)
+ return -EIO;
+ if (*p++ != rpc_auth_null)
+ return -EIO;
+ if (*p != xdr_zero)
+ return -EIO;
+ return 0;
}
const struct rpc_authops authnull_ops = {
@@ -114,6 +114,8 @@ static
struct rpc_auth null_auth = {
.au_cslack = NUL_CALLSLACK,
.au_rslack = NUL_REPLYSLACK,
+ .au_verfsize = NUL_REPLYSLACK,
+ .au_ralign = NUL_REPLYSLACK,
.au_ops = &authnull_ops,
.au_flavor = RPC_AUTH_NULL,
.au_count = REFCOUNT_INIT(1),
@@ -125,8 +127,10 @@ const struct rpc_credops null_credops = {
.crdestroy = nul_destroy_cred,
.crmatch = nul_match,
.crmarshal = nul_marshal,
+ .crwrap_req = rpcauth_wrap_req_encode,
.crrefresh = nul_refresh,
.crvalidate = nul_validate,
+ .crunwrap_resp = rpcauth_unwrap_resp_decode,
};
static
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 387f6b3ffbea..d4018e5a24c5 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -28,8 +28,6 @@ static mempool_t *unix_pool;
static struct rpc_auth *
unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
- dprintk("RPC: creating UNIX authenticator for client %p\n",
- clnt);
refcount_inc(&unix_auth.au_count);
return &unix_auth;
}
@@ -37,7 +35,6 @@ unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
static void
unx_destroy(struct rpc_auth *auth)
{
- dprintk("RPC: destroying UNIX authenticator %p\n", auth);
}
/*
@@ -48,10 +45,6 @@ unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{
struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS);
- dprintk("RPC: allocating UNIX cred for uid %d gid %d\n",
- from_kuid(&init_user_ns, acred->cred->fsuid),
- from_kgid(&init_user_ns, acred->cred->fsgid));
-
rpcauth_init_cred(ret, acred, auth, &unix_credops);
ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
return ret;
@@ -61,7 +54,7 @@ static void
unx_free_cred_callback(struct rcu_head *head)
{
struct rpc_cred *rpc_cred = container_of(head, struct rpc_cred, cr_rcu);
- dprintk("RPC: unx_free_cred %p\n", rpc_cred);
+
put_cred(rpc_cred->cr_cred);
mempool_free(rpc_cred, unix_pool);
}
@@ -87,7 +80,7 @@ unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
if (!uid_eq(cred->cr_cred->fsuid, acred->cred->fsuid) || !gid_eq(cred->cr_cred->fsgid, acred->cred->fsgid))
return 0;
- if (acred->cred && acred->cred->group_info != NULL)
+ if (acred->cred->group_info != NULL)
groups = acred->cred->group_info->ngroups;
if (groups > UNX_NGROUPS)
groups = UNX_NGROUPS;
@@ -106,37 +99,55 @@ unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
* Marshal credentials.
* Maybe we should keep a cached credential for performance reasons.
*/
-static __be32 *
-unx_marshal(struct rpc_task *task, __be32 *p)
+static int
+unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
- __be32 *base, *hold;
+ __be32 *p, *cred_len, *gidarr_len;
int i;
struct group_info *gi = cred->cr_cred->group_info;
- *p++ = htonl(RPC_AUTH_UNIX);
- base = p++;
- *p++ = htonl(jiffies/HZ);
-
- /*
- * Copy the UTS nodename captured when the client was created.
- */
- p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
-
- *p++ = htonl((u32) from_kuid(&init_user_ns, cred->cr_cred->fsuid));
- *p++ = htonl((u32) from_kgid(&init_user_ns, cred->cr_cred->fsgid));
- hold = p++;
+ /* Credential */
+
+ p = xdr_reserve_space(xdr, 3 * sizeof(*p));
+ if (!p)
+ goto marshal_failed;
+ *p++ = rpc_auth_unix;
+ cred_len = p++;
+ *p++ = xdr_zero; /* stamp */
+ if (xdr_stream_encode_opaque(xdr, clnt->cl_nodename,
+ clnt->cl_nodelen) < 0)
+ goto marshal_failed;
+ p = xdr_reserve_space(xdr, 3 * sizeof(*p));
+ if (!p)
+ goto marshal_failed;
+ *p++ = cpu_to_be32(from_kuid(&init_user_ns, cred->cr_cred->fsuid));
+ *p++ = cpu_to_be32(from_kgid(&init_user_ns, cred->cr_cred->fsgid));
+
+ gidarr_len = p++;
if (gi)
for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++)
- *p++ = htonl((u32) from_kgid(&init_user_ns, gi->gid[i]));
- *hold = htonl(p - hold - 1); /* gid array length */
- *base = htonl((p - base - 1) << 2); /* cred length */
+ *p++ = cpu_to_be32(from_kgid(&init_user_ns,
+ gi->gid[i]));
+ *gidarr_len = cpu_to_be32(p - gidarr_len - 1);
+ *cred_len = cpu_to_be32((p - cred_len - 1) << 2);
+ p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2);
+ if (!p)
+ goto marshal_failed;
+
+ /* Verifier */
+
+ p = xdr_reserve_space(xdr, 2 * sizeof(*p));
+ if (!p)
+ goto marshal_failed;
+ *p++ = rpc_auth_null;
+ *p = xdr_zero;
- *p++ = htonl(RPC_AUTH_NULL);
- *p++ = htonl(0);
+ return 0;
- return p;
+marshal_failed:
+ return -EMSGSIZE;
}
/*
@@ -149,29 +160,35 @@ unx_refresh(struct rpc_task *task)
return 0;
}
-static __be32 *
-unx_validate(struct rpc_task *task, __be32 *p)
+static int
+unx_validate(struct rpc_task *task, struct xdr_stream *xdr)
{
- rpc_authflavor_t flavor;
- u32 size;
-
- flavor = ntohl(*p++);
- if (flavor != RPC_AUTH_NULL &&
- flavor != RPC_AUTH_UNIX &&
- flavor != RPC_AUTH_SHORT) {
- printk("RPC: bad verf flavor: %u\n", flavor);
- return ERR_PTR(-EIO);
- }
-
- size = ntohl(*p++);
- if (size > RPC_MAX_AUTH_SIZE) {
- printk("RPC: giant verf size: %u\n", size);
- return ERR_PTR(-EIO);
+ struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth;
+ __be32 *p;
+ u32 size;
+
+ p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+ if (!p)
+ return -EIO;
+ switch (*p++) {
+ case rpc_auth_null:
+ case rpc_auth_unix:
+ case rpc_auth_short:
+ break;
+ default:
+ return -EIO;
}
- task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
- p += (size >> 2);
-
- return p;
+ size = be32_to_cpup(p);
+ if (size > RPC_MAX_AUTH_SIZE)
+ return -EIO;
+ p = xdr_inline_decode(xdr, size);
+ if (!p)
+ return -EIO;
+
+ auth->au_verfsize = XDR_QUADLEN(size) + 2;
+ auth->au_rslack = XDR_QUADLEN(size) + 2;
+ auth->au_ralign = XDR_QUADLEN(size) + 2;
+ return 0;
}
int __init rpc_init_authunix(void)
@@ -198,6 +215,7 @@ static
struct rpc_auth unix_auth = {
.au_cslack = UNX_CALLSLACK,
.au_rslack = NUL_REPLYSLACK,
+ .au_verfsize = NUL_REPLYSLACK,
.au_ops = &authunix_ops,
.au_flavor = RPC_AUTH_UNIX,
.au_count = REFCOUNT_INIT(1),
@@ -209,6 +227,8 @@ const struct rpc_credops unix_credops = {
.crdestroy = unx_destroy_cred,
.crmatch = unx_match,
.crmarshal = unx_marshal,
+ .crwrap_req = rpcauth_wrap_req_encode,
.crrefresh = unx_refresh,
.crvalidate = unx_validate,
+ .crunwrap_resp = rpcauth_unwrap_resp_decode,
};
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index ec451b8114b0..c47d82622fd1 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -235,7 +235,8 @@ out:
list_empty(&xprt->bc_pa_list) ? "true" : "false");
}
-static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
+static struct rpc_rqst *xprt_get_bc_request(struct rpc_xprt *xprt, __be32 xid,
+ struct rpc_rqst *new)
{
struct rpc_rqst *req = NULL;
@@ -243,22 +244,20 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
if (atomic_read(&xprt->bc_free_slots) <= 0)
goto not_found;
if (list_empty(&xprt->bc_pa_list)) {
- req = xprt_alloc_bc_req(xprt, GFP_ATOMIC);
- if (!req)
+ if (!new)
goto not_found;
- list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
+ list_add_tail(&new->rq_bc_pa_list, &xprt->bc_pa_list);
xprt->bc_alloc_count++;
}
req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
rq_bc_pa_list);
req->rq_reply_bytes_recvd = 0;
- req->rq_bytes_sent = 0;
memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
sizeof(req->rq_private_buf));
req->rq_xid = xid;
req->rq_connect_cookie = xprt->connect_cookie;
-not_found:
dprintk("RPC: backchannel req=%p\n", req);
+not_found:
return req;
}
@@ -321,18 +320,27 @@ void xprt_free_bc_rqst(struct rpc_rqst *req)
*/
struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid)
{
- struct rpc_rqst *req;
-
- spin_lock(&xprt->bc_pa_lock);
- list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) {
- if (req->rq_connect_cookie != xprt->connect_cookie)
- continue;
- if (req->rq_xid == xid)
- goto found;
- }
- req = xprt_alloc_bc_request(xprt, xid);
+ struct rpc_rqst *req, *new = NULL;
+
+ do {
+ spin_lock(&xprt->bc_pa_lock);
+ list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) {
+ if (req->rq_connect_cookie != xprt->connect_cookie)
+ continue;
+ if (req->rq_xid == xid)
+ goto found;
+ }
+ req = xprt_get_bc_request(xprt, xid, new);
found:
- spin_unlock(&xprt->bc_pa_lock);
+ spin_unlock(&xprt->bc_pa_lock);
+ if (new) {
+ if (req != new)
+ xprt_free_bc_rqst(new);
+ break;
+ } else if (req)
+ break;
+ new = xprt_alloc_bc_req(xprt, GFP_KERNEL);
+ } while (new);
return req;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d7ec6132c046..187d10443a15 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -66,20 +66,19 @@ static void call_decode(struct rpc_task *task);
static void call_bind(struct rpc_task *task);
static void call_bind_status(struct rpc_task *task);
static void call_transmit(struct rpc_task *task);
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-static void call_bc_transmit(struct rpc_task *task);
-#endif /* CONFIG_SUNRPC_BACKCHANNEL */
static void call_status(struct rpc_task *task);
static void call_transmit_status(struct rpc_task *task);
static void call_refresh(struct rpc_task *task);
static void call_refreshresult(struct rpc_task *task);
-static void call_timeout(struct rpc_task *task);
static void call_connect(struct rpc_task *task);
static void call_connect_status(struct rpc_task *task);
-static __be32 *rpc_encode_header(struct rpc_task *task);
-static __be32 *rpc_verify_header(struct rpc_task *task);
+static int rpc_encode_header(struct rpc_task *task,
+ struct xdr_stream *xdr);
+static int rpc_decode_header(struct rpc_task *task,
+ struct xdr_stream *xdr);
static int rpc_ping(struct rpc_clnt *clnt);
+static void rpc_check_timeout(struct rpc_task *task);
static void rpc_register_client(struct rpc_clnt *clnt)
{
@@ -834,9 +833,6 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
rovr->tk_flags |= RPC_TASK_KILLED;
rpc_exit(rovr, -EIO);
- if (RPC_IS_QUEUED(rovr))
- rpc_wake_up_queued_task(rovr->tk_waitqueue,
- rovr);
}
}
spin_unlock(&clnt->cl_lock);
@@ -1131,6 +1127,8 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags,
EXPORT_SYMBOL_GPL(rpc_call_async);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static void call_bc_encode(struct rpc_task *task);
+
/**
* rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
* rpc_execute against it
@@ -1152,7 +1150,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
task = rpc_new_task(&task_setup_data);
xprt_init_bc_request(req, task);
- task->tk_action = call_bc_transmit;
+ task->tk_action = call_bc_encode;
atomic_inc(&task->tk_count);
WARN_ON_ONCE(atomic_read(&task->tk_count) != 2);
rpc_execute(task);
@@ -1162,6 +1160,29 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+/**
+ * rpc_prepare_reply_pages - Prepare to receive a reply data payload into pages
+ * @req: RPC request to prepare
+ * @pages: vector of struct page pointers
+ * @base: offset in first page where receive should start, in bytes
+ * @len: expected size of the upper layer data payload, in bytes
+ * @hdrsize: expected size of upper layer reply header, in XDR words
+ *
+ */
+void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
+ unsigned int base, unsigned int len,
+ unsigned int hdrsize)
+{
+ /* Subtract one to force an extra word of buffer space for the
+ * payload's XDR pad to fall into the rcv_buf's tail iovec.
+ */
+ hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_ralign - 1;
+
+ xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
+ trace_rpc_reply_pages(req);
+}
+EXPORT_SYMBOL_GPL(rpc_prepare_reply_pages);
+
void
rpc_call_start(struct rpc_task *task)
{
@@ -1519,6 +1540,7 @@ call_start(struct rpc_task *task)
clnt->cl_stats->rpccnt++;
task->tk_action = call_reserve;
rpc_task_set_transport(task, clnt);
+ call_reserve(task);
}
/*
@@ -1532,6 +1554,9 @@ call_reserve(struct rpc_task *task)
task->tk_status = 0;
task->tk_action = call_reserveresult;
xprt_reserve(task);
+ if (rpc_task_need_resched(task))
+ return;
+ call_reserveresult(task);
}
static void call_retry_reserve(struct rpc_task *task);
@@ -1554,6 +1579,7 @@ call_reserveresult(struct rpc_task *task)
if (status >= 0) {
if (task->tk_rqstp) {
task->tk_action = call_refresh;
+ call_refresh(task);
return;
}
@@ -1579,6 +1605,7 @@ call_reserveresult(struct rpc_task *task)
/* fall through */
case -EAGAIN: /* woken up; retry */
task->tk_action = call_retry_reserve;
+ call_retry_reserve(task);
return;
case -EIO: /* probably a shutdown */
break;
@@ -1601,6 +1628,9 @@ call_retry_reserve(struct rpc_task *task)
task->tk_status = 0;
task->tk_action = call_reserveresult;
xprt_retry_reserve(task);
+ if (rpc_task_need_resched(task))
+ return;
+ call_reserveresult(task);
}
/*
@@ -1615,6 +1645,9 @@ call_refresh(struct rpc_task *task)
task->tk_status = 0;
task->tk_client->cl_stats->rpcauthrefresh++;
rpcauth_refreshcred(task);
+ if (rpc_task_need_resched(task))
+ return;
+ call_refreshresult(task);
}
/*
@@ -1633,6 +1666,7 @@ call_refreshresult(struct rpc_task *task)
case 0:
if (rpcauth_uptodatecred(task)) {
task->tk_action = call_allocate;
+ call_allocate(task);
return;
}
/* Use rate-limiting and a max number of retries if refresh
@@ -1651,6 +1685,7 @@ call_refreshresult(struct rpc_task *task)
task->tk_cred_retry--;
dprintk("RPC: %5u %s: retry refresh creds\n",
task->tk_pid, __func__);
+ call_refresh(task);
return;
}
dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
@@ -1665,7 +1700,7 @@ call_refreshresult(struct rpc_task *task)
static void
call_allocate(struct rpc_task *task)
{
- unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack;
+ const struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth;
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
@@ -1676,8 +1711,10 @@ call_allocate(struct rpc_task *task)
task->tk_status = 0;
task->tk_action = call_encode;
- if (req->rq_buffer)
+ if (req->rq_buffer) {
+ call_encode(task);
return;
+ }
if (proc->p_proc != 0) {
BUG_ON(proc->p_arglen == 0);
@@ -1690,15 +1727,25 @@ call_allocate(struct rpc_task *task)
* and reply headers, and convert both values
* to byte sizes.
*/
- req->rq_callsize = RPC_CALLHDRSIZE + (slack << 1) + proc->p_arglen;
+ req->rq_callsize = RPC_CALLHDRSIZE + (auth->au_cslack << 1) +
+ proc->p_arglen;
req->rq_callsize <<= 2;
- req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen;
+ /*
+ * Note: the reply buffer must at minimum allocate enough space
+ * for the 'struct accepted_reply' from RFC5531.
+ */
+ req->rq_rcvsize = RPC_REPHDRSIZE + auth->au_rslack + \
+ max_t(size_t, proc->p_replen, 2);
req->rq_rcvsize <<= 2;
status = xprt->ops->buf_alloc(task);
xprt_inject_disconnect(xprt);
- if (status == 0)
+ if (status == 0) {
+ if (rpc_task_need_resched(task))
+ return;
+ call_encode(task);
return;
+ }
if (status != -ENOMEM) {
rpc_exit(task, status);
return;
@@ -1728,10 +1775,7 @@ static void
rpc_xdr_encode(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
- kxdreproc_t encode;
- __be32 *p;
-
- dprint_status(task);
+ struct xdr_stream xdr;
xdr_buf_init(&req->rq_snd_buf,
req->rq_buffer,
@@ -1740,18 +1784,13 @@ rpc_xdr_encode(struct rpc_task *task)
req->rq_rbuffer,
req->rq_rcvsize);
- p = rpc_encode_header(task);
- if (p == NULL)
+ req->rq_snd_buf.head[0].iov_len = 0;
+ xdr_init_encode(&xdr, &req->rq_snd_buf,
+ req->rq_snd_buf.head[0].iov_base, req);
+ if (rpc_encode_header(task, &xdr))
return;
- encode = task->tk_msg.rpc_proc->p_encode;
- if (encode == NULL)
- return;
-
- task->tk_status = rpcauth_wrap_req(task, encode, req, p,
- task->tk_msg.rpc_argp);
- if (task->tk_status == 0)
- xprt_request_prepare(req);
+ task->tk_status = rpcauth_wrap_req(task, &xdr);
}
/*
@@ -1762,6 +1801,7 @@ call_encode(struct rpc_task *task)
{
if (!rpc_task_need_encode(task))
goto out;
+ dprint_status(task);
/* Encode here so that rpcsec_gss can use correct sequence number. */
rpc_xdr_encode(task);
/* Did the encode result in an error condition? */
@@ -1779,6 +1819,8 @@ call_encode(struct rpc_task *task)
rpc_exit(task, task->tk_status);
}
return;
+ } else {
+ xprt_request_prepare(task->tk_rqstp);
}
/* Add task to reply queue before transmission to avoid races */
@@ -1787,6 +1829,25 @@ call_encode(struct rpc_task *task)
xprt_request_enqueue_transmit(task);
out:
task->tk_action = call_bind;
+ call_bind(task);
+}
+
+/*
+ * Helpers to check if the task was already transmitted, and
+ * to take action when that is the case.
+ */
+static bool
+rpc_task_transmitted(struct rpc_task *task)
+{
+ return !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
+}
+
+static void
+rpc_task_handle_transmitted(struct rpc_task *task)
+{
+ xprt_end_transmit(task);
+ task->tk_action = call_transmit_status;
+ call_transmit_status(task);
}
/*
@@ -1797,14 +1858,25 @@ call_bind(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
- dprint_status(task);
+ if (rpc_task_transmitted(task)) {
+ rpc_task_handle_transmitted(task);
+ return;
+ }
- task->tk_action = call_connect;
- if (!xprt_bound(xprt)) {
- task->tk_action = call_bind_status;
- task->tk_timeout = xprt->bind_timeout;
- xprt->ops->rpcbind(task);
+ if (xprt_bound(xprt)) {
+ task->tk_action = call_connect;
+ call_connect(task);
+ return;
}
+
+ dprint_status(task);
+
+ task->tk_action = call_bind_status;
+ if (!xprt_prepare_transmit(task))
+ return;
+
+ task->tk_timeout = xprt->bind_timeout;
+ xprt->ops->rpcbind(task);
}
/*
@@ -1815,10 +1887,16 @@ call_bind_status(struct rpc_task *task)
{
int status = -EIO;
+ if (rpc_task_transmitted(task)) {
+ rpc_task_handle_transmitted(task);
+ return;
+ }
+
if (task->tk_status >= 0) {
dprint_status(task);
task->tk_status = 0;
task->tk_action = call_connect;
+ call_connect(task);
return;
}
@@ -1841,6 +1919,8 @@ call_bind_status(struct rpc_task *task)
task->tk_rebind_retry--;
rpc_delay(task, 3*HZ);
goto retry_timeout;
+ case -EAGAIN:
+ goto retry_timeout;
case -ETIMEDOUT:
dprintk("RPC: %5u rpcbind request timed out\n",
task->tk_pid);
@@ -1882,7 +1962,8 @@ call_bind_status(struct rpc_task *task)
retry_timeout:
task->tk_status = 0;
- task->tk_action = call_timeout;
+ task->tk_action = call_bind;
+ rpc_check_timeout(task);
}
/*
@@ -1893,21 +1974,31 @@ call_connect(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
+ if (rpc_task_transmitted(task)) {
+ rpc_task_handle_transmitted(task);
+ return;
+ }
+
+ if (xprt_connected(xprt)) {
+ task->tk_action = call_transmit;
+ call_transmit(task);
+ return;
+ }
+
dprintk("RPC: %5u call_connect xprt %p %s connected\n",
task->tk_pid, xprt,
(xprt_connected(xprt) ? "is" : "is not"));
- task->tk_action = call_transmit;
- if (!xprt_connected(xprt)) {
- task->tk_action = call_connect_status;
- if (task->tk_status < 0)
- return;
- if (task->tk_flags & RPC_TASK_NOCONNECT) {
- rpc_exit(task, -ENOTCONN);
- return;
- }
- xprt_connect(task);
+ task->tk_action = call_connect_status;
+ if (task->tk_status < 0)
+ return;
+ if (task->tk_flags & RPC_TASK_NOCONNECT) {
+ rpc_exit(task, -ENOTCONN);
+ return;
}
+ if (!xprt_prepare_transmit(task))
+ return;
+ xprt_connect(task);
}
/*
@@ -1919,10 +2010,8 @@ call_connect_status(struct rpc_task *task)
struct rpc_clnt *clnt = task->tk_client;
int status = task->tk_status;
- /* Check if the task was already transmitted */
- if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
- xprt_end_transmit(task);
- task->tk_action = call_transmit_status;
+ if (rpc_task_transmitted(task)) {
+ rpc_task_handle_transmitted(task);
return;
}
@@ -1937,8 +2026,7 @@ call_connect_status(struct rpc_task *task)
break;
if (clnt->cl_autobind) {
rpc_force_rebind(clnt);
- task->tk_action = call_bind;
- return;
+ goto out_retry;
}
/* fall through */
case -ECONNRESET:
@@ -1958,16 +2046,20 @@ call_connect_status(struct rpc_task *task)
/* fall through */
case -ENOTCONN:
case -EAGAIN:
- /* Check for timeouts before looping back to call_bind */
case -ETIMEDOUT:
- task->tk_action = call_timeout;
- return;
+ goto out_retry;
case 0:
clnt->cl_stats->netreconn++;
task->tk_action = call_transmit;
+ call_transmit(task);
return;
}
rpc_exit(task, status);
+ return;
+out_retry:
+ /* Check for timeouts before looping back to call_bind */
+ task->tk_action = call_bind;
+ rpc_check_timeout(task);
}
/*
@@ -1976,16 +2068,28 @@ call_connect_status(struct rpc_task *task)
static void
call_transmit(struct rpc_task *task)
{
+ if (rpc_task_transmitted(task)) {
+ rpc_task_handle_transmitted(task);
+ return;
+ }
+
dprint_status(task);
+ task->tk_action = call_transmit_status;
+ if (!xprt_prepare_transmit(task))
+ return;
task->tk_status = 0;
if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
- if (!xprt_prepare_transmit(task))
+ if (!xprt_connected(task->tk_xprt)) {
+ task->tk_status = -ENOTCONN;
return;
+ }
xprt_transmit(task);
}
- task->tk_action = call_transmit_status;
xprt_end_transmit(task);
+ if (rpc_task_need_resched(task))
+ return;
+ call_transmit_status(task);
}
/*
@@ -2000,8 +2104,12 @@ call_transmit_status(struct rpc_task *task)
* Common case: success. Force the compiler to put this
* test first.
*/
- if (task->tk_status == 0) {
- xprt_request_wait_receive(task);
+ if (rpc_task_transmitted(task)) {
+ if (task->tk_status == 0)
+ xprt_request_wait_receive(task);
+ if (rpc_task_need_resched(task))
+ return;
+ call_status(task);
return;
}
@@ -2038,7 +2146,7 @@ call_transmit_status(struct rpc_task *task)
trace_xprt_ping(task->tk_xprt,
task->tk_status);
rpc_exit(task, task->tk_status);
- break;
+ return;
}
/* fall through */
case -ECONNRESET:
@@ -2046,11 +2154,25 @@ call_transmit_status(struct rpc_task *task)
case -EADDRINUSE:
case -ENOTCONN:
case -EPIPE:
+ task->tk_action = call_bind;
+ task->tk_status = 0;
break;
}
+ rpc_check_timeout(task);
}
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static void call_bc_transmit(struct rpc_task *task);
+static void call_bc_transmit_status(struct rpc_task *task);
+
+static void
+call_bc_encode(struct rpc_task *task)
+{
+ xprt_request_enqueue_transmit(task);
+ task->tk_action = call_bc_transmit;
+ call_bc_transmit(task);
+}
+
/*
* 5b. Send the backchannel RPC reply. On error, drop the reply. In
* addition, disconnect on connectivity errors.
@@ -2058,26 +2180,23 @@ call_transmit_status(struct rpc_task *task)
static void
call_bc_transmit(struct rpc_task *task)
{
- struct rpc_rqst *req = task->tk_rqstp;
-
- if (rpc_task_need_encode(task))
- xprt_request_enqueue_transmit(task);
- if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
- goto out_wakeup;
-
- if (!xprt_prepare_transmit(task))
- goto out_retry;
-
- if (task->tk_status < 0) {
- printk(KERN_NOTICE "RPC: Could not send backchannel reply "
- "error: %d\n", task->tk_status);
- goto out_done;
+ task->tk_action = call_bc_transmit_status;
+ if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
+ if (!xprt_prepare_transmit(task))
+ return;
+ task->tk_status = 0;
+ xprt_transmit(task);
}
+ xprt_end_transmit(task);
+}
- xprt_transmit(task);
+static void
+call_bc_transmit_status(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
- xprt_end_transmit(task);
dprint_status(task);
+
switch (task->tk_status) {
case 0:
/* Success */
@@ -2091,8 +2210,14 @@ call_bc_transmit(struct rpc_task *task)
case -ENOTCONN:
case -EPIPE:
break;
+ case -ENOBUFS:
+ rpc_delay(task, HZ>>2);
+ /* fall through */
+ case -EBADSLT:
case -EAGAIN:
- goto out_retry;
+ task->tk_status = 0;
+ task->tk_action = call_bc_transmit;
+ return;
case -ETIMEDOUT:
/*
* Problem reaching the server. Disconnect and let the
@@ -2111,18 +2236,11 @@ call_bc_transmit(struct rpc_task *task)
* We were unable to reply and will have to drop the
* request. The server should reconnect and retransmit.
*/
- WARN_ON_ONCE(task->tk_status == -EAGAIN);
printk(KERN_NOTICE "RPC: Could not send backchannel reply "
"error: %d\n", task->tk_status);
break;
}
-out_wakeup:
- rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
-out_done:
task->tk_action = rpc_exit_task;
- return;
-out_retry:
- task->tk_status = 0;
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
@@ -2143,6 +2261,7 @@ call_status(struct rpc_task *task)
status = task->tk_status;
if (status >= 0) {
task->tk_action = call_decode;
+ call_decode(task);
return;
}
@@ -2154,10 +2273,8 @@ call_status(struct rpc_task *task)
case -EHOSTUNREACH:
case -ENETUNREACH:
case -EPERM:
- if (RPC_IS_SOFTCONN(task)) {
- rpc_exit(task, status);
- break;
- }
+ if (RPC_IS_SOFTCONN(task))
+ goto out_exit;
/*
* Delay any retries for 3 seconds, then handle as if it
* were a timeout.
@@ -2165,7 +2282,6 @@ call_status(struct rpc_task *task)
rpc_delay(task, 3*HZ);
/* fall through */
case -ETIMEDOUT:
- task->tk_action = call_timeout;
break;
case -ECONNREFUSED:
case -ECONNRESET:
@@ -2178,42 +2294,48 @@ call_status(struct rpc_task *task)
case -EPIPE:
case -ENOTCONN:
case -EAGAIN:
- task->tk_action = call_encode;
break;
case -EIO:
/* shutdown or soft timeout */
- rpc_exit(task, status);
- break;
+ goto out_exit;
default:
if (clnt->cl_chatty)
printk("%s: RPC call returned error %d\n",
clnt->cl_program->name, -status);
- rpc_exit(task, status);
+ goto out_exit;
}
+ task->tk_action = call_encode;
+ rpc_check_timeout(task);
+ return;
+out_exit:
+ rpc_exit(task, status);
+}
+
+static bool
+rpc_check_connected(const struct rpc_rqst *req)
+{
+ /* No allocated request or transport? return true */
+ if (!req || !req->rq_xprt)
+ return true;
+ return xprt_connected(req->rq_xprt);
}
-/*
- * 6a. Handle RPC timeout
- * We do not release the request slot, so we keep using the
- * same XID for all retransmits.
- */
static void
-call_timeout(struct rpc_task *task)
+rpc_check_timeout(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
- if (xprt_adjust_timeout(task->tk_rqstp) == 0) {
- dprintk("RPC: %5u call_timeout (minor)\n", task->tk_pid);
- goto retry;
- }
+ if (xprt_adjust_timeout(task->tk_rqstp) == 0)
+ return;
dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid);
task->tk_timeouts++;
- if (RPC_IS_SOFTCONN(task)) {
+ if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) {
rpc_exit(task, -ETIMEDOUT);
return;
}
+
if (RPC_IS_SOFT(task)) {
if (clnt->cl_chatty) {
printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
@@ -2241,10 +2363,6 @@ call_timeout(struct rpc_task *task)
* event? RFC2203 requires the server to drop all such requests.
*/
rpcauth_invalcred(task);
-
-retry:
- task->tk_action = call_encode;
- task->tk_status = 0;
}
/*
@@ -2255,12 +2373,11 @@ call_decode(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
- kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode;
- __be32 *p;
+ struct xdr_stream xdr;
dprint_status(task);
- if (!decode) {
+ if (!task->tk_msg.rpc_proc->p_decode) {
task->tk_action = rpc_exit_task;
return;
}
@@ -2285,223 +2402,186 @@ call_decode(struct rpc_task *task)
WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,
sizeof(req->rq_rcv_buf)) != 0);
- if (req->rq_rcv_buf.len < 12) {
- if (!RPC_IS_SOFT(task)) {
- task->tk_action = call_encode;
- goto out_retry;
- }
- dprintk("RPC: %s: too small RPC reply size (%d bytes)\n",
- clnt->cl_program->name, task->tk_status);
- task->tk_action = call_timeout;
- goto out_retry;
- }
-
- p = rpc_verify_header(task);
- if (IS_ERR(p)) {
- if (p == ERR_PTR(-EAGAIN))
- goto out_retry;
+ xdr_init_decode(&xdr, &req->rq_rcv_buf,
+ req->rq_rcv_buf.head[0].iov_base, req);
+ switch (rpc_decode_header(task, &xdr)) {
+ case 0:
+ task->tk_action = rpc_exit_task;
+ task->tk_status = rpcauth_unwrap_resp(task, &xdr);
+ dprintk("RPC: %5u %s result %d\n",
+ task->tk_pid, __func__, task->tk_status);
return;
- }
- task->tk_action = rpc_exit_task;
-
- task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
- task->tk_msg.rpc_resp);
-
- dprintk("RPC: %5u call_decode result %d\n", task->tk_pid,
- task->tk_status);
- return;
-out_retry:
- task->tk_status = 0;
- /* Note: rpc_verify_header() may have freed the RPC slot */
- if (task->tk_rqstp == req) {
- xdr_free_bvec(&req->rq_rcv_buf);
- req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0;
- if (task->tk_client->cl_discrtry)
- xprt_conditional_disconnect(req->rq_xprt,
- req->rq_connect_cookie);
+ case -EAGAIN:
+ task->tk_status = 0;
+ /* Note: rpc_decode_header() may have freed the RPC slot */
+ if (task->tk_rqstp == req) {
+ xdr_free_bvec(&req->rq_rcv_buf);
+ req->rq_reply_bytes_recvd = 0;
+ req->rq_rcv_buf.len = 0;
+ if (task->tk_client->cl_discrtry)
+ xprt_conditional_disconnect(req->rq_xprt,
+ req->rq_connect_cookie);
+ }
+ task->tk_action = call_encode;
+ rpc_check_timeout(task);
}
}
-static __be32 *
-rpc_encode_header(struct rpc_task *task)
+static int
+rpc_encode_header(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
- __be32 *p = req->rq_svec[0].iov_base;
-
- /* FIXME: check buffer size? */
-
- p = xprt_skip_transport_header(req->rq_xprt, p);
- *p++ = req->rq_xid; /* XID */
- *p++ = htonl(RPC_CALL); /* CALL */
- *p++ = htonl(RPC_VERSION); /* RPC version */
- *p++ = htonl(clnt->cl_prog); /* program number */
- *p++ = htonl(clnt->cl_vers); /* program version */
- *p++ = htonl(task->tk_msg.rpc_proc->p_proc); /* procedure */
- p = rpcauth_marshcred(task, p);
- if (p)
- req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p);
- return p;
+ __be32 *p;
+ int error;
+
+ error = -EMSGSIZE;
+ p = xdr_reserve_space(xdr, RPC_CALLHDRSIZE << 2);
+ if (!p)
+ goto out_fail;
+ *p++ = req->rq_xid;
+ *p++ = rpc_call;
+ *p++ = cpu_to_be32(RPC_VERSION);
+ *p++ = cpu_to_be32(clnt->cl_prog);
+ *p++ = cpu_to_be32(clnt->cl_vers);
+ *p = cpu_to_be32(task->tk_msg.rpc_proc->p_proc);
+
+ error = rpcauth_marshcred(task, xdr);
+ if (error < 0)
+ goto out_fail;
+ return 0;
+out_fail:
+ trace_rpc_bad_callhdr(task);
+ rpc_exit(task, error);
+ return error;
}
-static __be32 *
-rpc_verify_header(struct rpc_task *task)
+static noinline int
+rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_clnt *clnt = task->tk_client;
- struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
- int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
- __be32 *p = iov->iov_base;
- u32 n;
- int error = -EACCES;
-
- if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) {
- /* RFC-1014 says that the representation of XDR data must be a
- * multiple of four bytes
- * - if it isn't pointer subtraction in the NFS client may give
- * undefined results
- */
- dprintk("RPC: %5u %s: XDR representation not a multiple of"
- " 4 bytes: 0x%x\n", task->tk_pid, __func__,
- task->tk_rqstp->rq_rcv_buf.len);
- error = -EIO;
- goto out_err;
- }
- if ((len -= 3) < 0)
- goto out_overflow;
+ int error;
+ __be32 *p;
- p += 1; /* skip XID */
- if ((n = ntohl(*p++)) != RPC_REPLY) {
- dprintk("RPC: %5u %s: not an RPC reply: %x\n",
- task->tk_pid, __func__, n);
- error = -EIO;
- goto out_garbage;
- }
-
- if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
- if (--len < 0)
- goto out_overflow;
- switch ((n = ntohl(*p++))) {
- case RPC_AUTH_ERROR:
- break;
- case RPC_MISMATCH:
- dprintk("RPC: %5u %s: RPC call version mismatch!\n",
- task->tk_pid, __func__);
- error = -EPROTONOSUPPORT;
- goto out_err;
- default:
- dprintk("RPC: %5u %s: RPC call rejected, "
- "unknown error: %x\n",
- task->tk_pid, __func__, n);
- error = -EIO;
- goto out_err;
- }
- if (--len < 0)
- goto out_overflow;
- switch ((n = ntohl(*p++))) {
- case RPC_AUTH_REJECTEDCRED:
- case RPC_AUTH_REJECTEDVERF:
- case RPCSEC_GSS_CREDPROBLEM:
- case RPCSEC_GSS_CTXPROBLEM:
- if (!task->tk_cred_retry)
- break;
- task->tk_cred_retry--;
- dprintk("RPC: %5u %s: retry stale creds\n",
- task->tk_pid, __func__);
- rpcauth_invalcred(task);
- /* Ensure we obtain a new XID! */
- xprt_release(task);
- task->tk_action = call_reserve;
- goto out_retry;
- case RPC_AUTH_BADCRED:
- case RPC_AUTH_BADVERF:
- /* possibly garbled cred/verf? */
- if (!task->tk_garb_retry)
- break;
- task->tk_garb_retry--;
- dprintk("RPC: %5u %s: retry garbled creds\n",
- task->tk_pid, __func__);
- task->tk_action = call_encode;
- goto out_retry;
- case RPC_AUTH_TOOWEAK:
- printk(KERN_NOTICE "RPC: server %s requires stronger "
- "authentication.\n",
- task->tk_xprt->servername);
- break;
- default:
- dprintk("RPC: %5u %s: unknown auth error: %x\n",
- task->tk_pid, __func__, n);
- error = -EIO;
- }
- dprintk("RPC: %5u %s: call rejected %d\n",
- task->tk_pid, __func__, n);
- goto out_err;
- }
- p = rpcauth_checkverf(task, p);
- if (IS_ERR(p)) {
- error = PTR_ERR(p);
- dprintk("RPC: %5u %s: auth check failed with %d\n",
- task->tk_pid, __func__, error);
- goto out_garbage; /* bad verifier, retry */
- }
- len = p - (__be32 *)iov->iov_base - 1;
- if (len < 0)
- goto out_overflow;
- switch ((n = ntohl(*p++))) {
- case RPC_SUCCESS:
- return p;
- case RPC_PROG_UNAVAIL:
- dprintk("RPC: %5u %s: program %u is unsupported "
- "by server %s\n", task->tk_pid, __func__,
- (unsigned int)clnt->cl_prog,
- task->tk_xprt->servername);
+ /* RFC-1014 says that the representation of XDR data must be a
+ * multiple of four bytes
+ * - if it isn't pointer subtraction in the NFS client may give
+ * undefined results
+ */
+ if (task->tk_rqstp->rq_rcv_buf.len & 3)
+ goto out_unparsable;
+
+ p = xdr_inline_decode(xdr, 3 * sizeof(*p));
+ if (!p)
+ goto out_unparsable;
+ p++; /* skip XID */
+ if (*p++ != rpc_reply)
+ goto out_unparsable;
+ if (*p++ != rpc_msg_accepted)
+ goto out_msg_denied;
+
+ error = rpcauth_checkverf(task, xdr);
+ if (error)
+ goto out_verifier;
+
+ p = xdr_inline_decode(xdr, sizeof(*p));
+ if (!p)
+ goto out_unparsable;
+ switch (*p) {
+ case rpc_success:
+ return 0;
+ case rpc_prog_unavail:
+ trace_rpc__prog_unavail(task);
error = -EPFNOSUPPORT;
goto out_err;
- case RPC_PROG_MISMATCH:
- dprintk("RPC: %5u %s: program %u, version %u unsupported "
- "by server %s\n", task->tk_pid, __func__,
- (unsigned int)clnt->cl_prog,
- (unsigned int)clnt->cl_vers,
- task->tk_xprt->servername);
+ case rpc_prog_mismatch:
+ trace_rpc__prog_mismatch(task);
error = -EPROTONOSUPPORT;
goto out_err;
- case RPC_PROC_UNAVAIL:
- dprintk("RPC: %5u %s: proc %s unsupported by program %u, "
- "version %u on server %s\n",
- task->tk_pid, __func__,
- rpc_proc_name(task),
- clnt->cl_prog, clnt->cl_vers,
- task->tk_xprt->servername);
+ case rpc_proc_unavail:
+ trace_rpc__proc_unavail(task);
error = -EOPNOTSUPP;
goto out_err;
- case RPC_GARBAGE_ARGS:
- dprintk("RPC: %5u %s: server saw garbage\n",
- task->tk_pid, __func__);
- break; /* retry */
+ case rpc_garbage_args:
+ case rpc_system_err:
+ trace_rpc__garbage_args(task);
+ error = -EIO;
+ break;
default:
- dprintk("RPC: %5u %s: server accept status: %x\n",
- task->tk_pid, __func__, n);
- /* Also retry */
+ goto out_unparsable;
}
out_garbage:
clnt->cl_stats->rpcgarbage++;
if (task->tk_garb_retry) {
task->tk_garb_retry--;
- dprintk("RPC: %5u %s: retrying\n",
- task->tk_pid, __func__);
task->tk_action = call_encode;
-out_retry:
- return ERR_PTR(-EAGAIN);
+ return -EAGAIN;
}
out_err:
rpc_exit(task, error);
- dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid,
- __func__, error);
- return ERR_PTR(error);
-out_overflow:
- dprintk("RPC: %5u %s: server reply was truncated.\n", task->tk_pid,
- __func__);
+ return error;
+
+out_unparsable:
+ trace_rpc__unparsable(task);
+ error = -EIO;
+ goto out_garbage;
+
+out_verifier:
+ trace_rpc_bad_verifier(task);
goto out_garbage;
+
+out_msg_denied:
+ error = -EACCES;
+ p = xdr_inline_decode(xdr, sizeof(*p));
+ if (!p)
+ goto out_unparsable;
+ switch (*p++) {
+ case rpc_auth_error:
+ break;
+ case rpc_mismatch:
+ trace_rpc__mismatch(task);
+ error = -EPROTONOSUPPORT;
+ goto out_err;
+ default:
+ goto out_unparsable;
+ }
+
+ p = xdr_inline_decode(xdr, sizeof(*p));
+ if (!p)
+ goto out_unparsable;
+ switch (*p++) {
+ case rpc_autherr_rejectedcred:
+ case rpc_autherr_rejectedverf:
+ case rpcsec_gsserr_credproblem:
+ case rpcsec_gsserr_ctxproblem:
+ if (!task->tk_cred_retry)
+ break;
+ task->tk_cred_retry--;
+ trace_rpc__stale_creds(task);
+ rpcauth_invalcred(task);
+ /* Ensure we obtain a new XID! */
+ xprt_release(task);
+ task->tk_action = call_reserve;
+ return -EAGAIN;
+ case rpc_autherr_badcred:
+ case rpc_autherr_badverf:
+ /* possibly garbled cred/verf? */
+ if (!task->tk_garb_retry)
+ break;
+ task->tk_garb_retry--;
+ trace_rpc__bad_creds(task);
+ task->tk_action = call_encode;
+ return -EAGAIN;
+ case rpc_autherr_tooweak:
+ trace_rpc__auth_tooweak(task);
+ pr_warn("RPC: server %s requires stronger authentication.\n",
+ task->tk_xprt->servername);
+ break;
+ default:
+ goto out_unparsable;
+ }
+ goto out_err;
}
static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index adc3c40cc733..28956c70100a 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -19,6 +19,7 @@
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/freezer.h>
+#include <linux/sched/mm.h>
#include <linux/sunrpc/clnt.h>
@@ -784,8 +785,7 @@ void rpc_exit(struct rpc_task *task, int status)
{
task->tk_status = status;
task->tk_action = rpc_exit_task;
- if (RPC_IS_QUEUED(task))
- rpc_wake_up_queued_task(task->tk_waitqueue, task);
+ rpc_wake_up_queued_task(task->tk_waitqueue, task);
}
EXPORT_SYMBOL_GPL(rpc_exit);
@@ -902,7 +902,10 @@ void rpc_execute(struct rpc_task *task)
static void rpc_async_schedule(struct work_struct *work)
{
+ unsigned int pflags = memalloc_nofs_save();
+
__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
+ memalloc_nofs_restore(pflags);
}
/**
@@ -921,16 +924,13 @@ static void rpc_async_schedule(struct work_struct *work)
* Most requests are 'small' (under 2KiB) and can be serviced from a
* mempool, ensuring that NFS reads and writes can always proceed,
* and that there is good locality of reference for these buffers.
- *
- * In order to avoid memory starvation triggering more writebacks of
- * NFS requests, we avoid using GFP_KERNEL.
*/
int rpc_malloc(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
size_t size = rqst->rq_callsize + rqst->rq_rcvsize;
struct rpc_buffer *buf;
- gfp_t gfp = GFP_NOIO | __GFP_NOWARN;
+ gfp_t gfp = GFP_NOFS;
if (RPC_IS_SWAPPER(task))
gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
@@ -1011,7 +1011,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
static struct rpc_task *
rpc_alloc_task(void)
{
- return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO);
+ return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
}
/*
@@ -1067,7 +1067,10 @@ static void rpc_free_task(struct rpc_task *task)
static void rpc_async_release(struct work_struct *work)
{
+ unsigned int pflags = memalloc_nofs_save();
+
rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
+ memalloc_nofs_restore(pflags);
}
static void rpc_release_resources_task(struct rpc_task *task)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e87ddb9f7feb..dbd19697ee38 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1145,17 +1145,6 @@ static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ..
#endif
/*
- * Setup response header for TCP, it has a 4B record length field.
- */
-static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
-{
- struct kvec *resv = &rqstp->rq_res.head[0];
-
- /* tcp needs a space for the record length... */
- svc_putnl(resv, 0);
-}
-
-/*
* Common routine for processing the RPC request.
*/
static int
@@ -1182,10 +1171,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
clear_bit(RQ_DROPME, &rqstp->rq_flags);
- /* Setup reply header */
- if (rqstp->rq_prot == IPPROTO_TCP)
- svc_tcp_prep_reply_hdr(rqstp);
-
svc_putu32(resv, rqstp->rq_xid);
vers = svc_getnl(argv);
@@ -1443,6 +1428,10 @@ svc_process(struct svc_rqst *rqstp)
goto out_drop;
}
+ /* Reserve space for the record marker */
+ if (rqstp->rq_prot == IPPROTO_TCP)
+ svc_putnl(resv, 0);
+
/* Returns 1 for send, 0 for drop */
if (likely(svc_process_common(rqstp, argv, resv)))
return svc_send(rqstp);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 4eb8fbf2508d..61530b1b7754 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -357,15 +357,29 @@ static void svc_xprt_release_slot(struct svc_rqst *rqstp)
struct svc_xprt *xprt = rqstp->rq_xprt;
if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) {
atomic_dec(&xprt->xpt_nr_rqsts);
+ smp_wmb(); /* See smp_rmb() in svc_xprt_ready() */
svc_xprt_enqueue(xprt);
}
}
-static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
+static bool svc_xprt_ready(struct svc_xprt *xprt)
{
- if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
+ unsigned long xpt_flags;
+
+ /*
+ * If another cpu has recently updated xpt_flags,
+ * sk_sock->flags, xpt_reserved, or xpt_nr_rqsts, we need to
+ * know about it; otherwise it's possible that both that cpu and
+ * this one could call svc_xprt_enqueue() without either
+ * svc_xprt_enqueue() recognizing that the conditions below
+ * are satisfied, and we could stall indefinitely:
+ */
+ smp_rmb();
+ xpt_flags = READ_ONCE(xprt->xpt_flags);
+
+ if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
return true;
- if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) {
+ if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
svc_xprt_slots_in_range(xprt))
return true;
@@ -381,7 +395,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
struct svc_rqst *rqstp = NULL;
int cpu;
- if (!svc_xprt_has_something_to_do(xprt))
+ if (!svc_xprt_ready(xprt))
return;
/* Mark transport as busy. It will remain in this state until
@@ -475,7 +489,7 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
if (xprt && space < rqstp->rq_reserved) {
atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
rqstp->rq_reserved = space;
-
+ smp_wmb(); /* See smp_rmb() in svc_xprt_ready() */
svc_xprt_enqueue(xprt);
}
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a6a060925e5d..43590a968b73 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -349,12 +349,16 @@ static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov,
/*
* Set socket snd and rcv buffer lengths
*/
-static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
- unsigned int rcv)
+static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
{
+ unsigned int max_mesg = svsk->sk_xprt.xpt_server->sv_max_mesg;
+ struct socket *sock = svsk->sk_sock;
+
+ nreqs = min(nreqs, INT_MAX / 2 / max_mesg);
+
lock_sock(sock->sk);
- sock->sk->sk_sndbuf = snd * 2;
- sock->sk->sk_rcvbuf = rcv * 2;
+ sock->sk->sk_sndbuf = nreqs * max_mesg * 2;
+ sock->sk->sk_rcvbuf = nreqs * max_mesg * 2;
sock->sk->sk_write_space(sock->sk);
release_sock(sock->sk);
}
@@ -516,9 +520,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
* provides an upper bound on the number of threads
* which will access the socket.
*/
- svc_sock_setbufsize(svsk->sk_sock,
- (serv->sv_nrthreads+3) * serv->sv_max_mesg,
- (serv->sv_nrthreads+3) * serv->sv_max_mesg);
+ svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3);
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
skb = NULL;
@@ -681,9 +683,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
* receive and respond to one request.
* svc_udp_recvfrom will re-adjust if necessary
*/
- svc_sock_setbufsize(svsk->sk_sock,
- 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
- 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
+ svc_sock_setbufsize(svsk, 3);
/* data might have come in before data_ready set up */
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index f302c6eb8779..aa8177ddcbda 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -16,6 +16,7 @@
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/msg_prot.h>
#include <linux/bvec.h>
+#include <trace/events/sunrpc.h>
/*
* XDR functions for basic NFS types
@@ -162,6 +163,15 @@ xdr_free_bvec(struct xdr_buf *buf)
buf->bvec = NULL;
}
+/**
+ * xdr_inline_pages - Prepare receive buffer for a large reply
+ * @xdr: xdr_buf into which reply will be placed
+ * @offset: expected offset where data payload will start, in bytes
+ * @pages: vector of struct page pointers
+ * @base: offset in first page where receive should start, in bytes
+ * @len: expected size of the upper layer data payload, in bytes
+ *
+ */
void
xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
struct page **pages, unsigned int base, unsigned int len)
@@ -179,6 +189,8 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
tail->iov_base = buf + offset;
tail->iov_len = buflen - offset;
+ if ((xdr->page_len & 3) == 0)
+ tail->iov_len -= sizeof(__be32);
xdr->buflen += len;
}
@@ -346,13 +358,15 @@ EXPORT_SYMBOL_GPL(_copy_from_pages);
* 'len' bytes. The extra data is not lost, but is instead
* moved into the inlined pages and/or the tail.
*/
-static void
+static unsigned int
xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
{
struct kvec *head, *tail;
size_t copy, offs;
unsigned int pglen = buf->page_len;
+ unsigned int result;
+ result = 0;
tail = buf->tail;
head = buf->head;
@@ -366,6 +380,7 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
copy = tail->iov_len - len;
memmove((char *)tail->iov_base + len,
tail->iov_base, copy);
+ result += copy;
}
/* Copy from the inlined pages into the tail */
copy = len;
@@ -376,11 +391,13 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
copy = 0;
else if (copy > tail->iov_len - offs)
copy = tail->iov_len - offs;
- if (copy != 0)
+ if (copy != 0) {
_copy_from_pages((char *)tail->iov_base + offs,
buf->pages,
buf->page_base + pglen + offs - len,
copy);
+ result += copy;
+ }
/* Do we also need to copy data from the head into the tail ? */
if (len > pglen) {
offs = copy = len - pglen;
@@ -390,6 +407,7 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
(char *)head->iov_base +
head->iov_len - offs,
copy);
+ result += copy;
}
}
/* Now handle pages */
@@ -405,12 +423,15 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
_copy_to_pages(buf->pages, buf->page_base,
(char *)head->iov_base + head->iov_len - len,
copy);
+ result += copy;
}
head->iov_len -= len;
buf->buflen -= len;
/* Have we truncated the message? */
if (buf->len > buf->buflen)
buf->len = buf->buflen;
+
+ return result;
}
/**
@@ -422,14 +443,16 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
* 'len' bytes. The extra data is not lost, but is instead
* moved into the tail.
*/
-static void
+static unsigned int
xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
{
struct kvec *tail;
size_t copy;
unsigned int pglen = buf->page_len;
unsigned int tailbuf_len;
+ unsigned int result;
+ result = 0;
tail = buf->tail;
BUG_ON (len > pglen);
@@ -447,18 +470,22 @@ xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
if (tail->iov_len > len) {
char *p = (char *)tail->iov_base + len;
memmove(p, tail->iov_base, tail->iov_len - len);
+ result += tail->iov_len - len;
} else
copy = tail->iov_len;
/* Copy from the inlined pages into the tail */
_copy_from_pages((char *)tail->iov_base,
buf->pages, buf->page_base + pglen - len,
copy);
+ result += copy;
}
buf->page_len -= len;
buf->buflen -= len;
/* Have we truncated the message? */
if (buf->len > buf->buflen)
buf->len = buf->buflen;
+
+ return result;
}
void
@@ -483,6 +510,7 @@ EXPORT_SYMBOL_GPL(xdr_stream_pos);
* @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer in which to encode data
* @p: current pointer inside XDR buffer
+ * @rqst: pointer to controlling rpc_rqst, for debugging
*
* Note: at the moment the RPC client only passes the length of our
* scratch buffer in the xdr_buf's header kvec. Previously this
@@ -491,7 +519,8 @@ EXPORT_SYMBOL_GPL(xdr_stream_pos);
* of the buffer length, and takes care of adjusting the kvec
* length for us.
*/
-void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
+void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
+ struct rpc_rqst *rqst)
{
struct kvec *iov = buf->head;
int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
@@ -513,6 +542,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
buf->len += len;
iov->iov_len += len;
}
+ xdr->rqst = rqst;
}
EXPORT_SYMBOL_GPL(xdr_init_encode);
@@ -551,9 +581,9 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
int frag1bytes, frag2bytes;
if (nbytes > PAGE_SIZE)
- return NULL; /* Bigger buffers require special handling */
+ goto out_overflow; /* Bigger buffers require special handling */
if (xdr->buf->len + nbytes > xdr->buf->buflen)
- return NULL; /* Sorry, we're totally out of space */
+ goto out_overflow; /* Sorry, we're totally out of space */
frag1bytes = (xdr->end - xdr->p) << 2;
frag2bytes = nbytes - frag1bytes;
if (xdr->iov)
@@ -582,6 +612,9 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
xdr->buf->page_len += frag2bytes;
xdr->buf->len += nbytes;
return p;
+out_overflow:
+ trace_rpc_xdr_overflow(xdr, nbytes);
+ return NULL;
}
/**
@@ -819,8 +852,10 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr)
* @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer from which to decode data
* @p: current pointer inside XDR buffer
+ * @rqst: pointer to controlling rpc_rqst, for debugging
*/
-void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
+void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
+ struct rpc_rqst *rqst)
{
xdr->buf = buf;
xdr->scratch.iov_base = NULL;
@@ -836,6 +871,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
xdr->nwords -= p - xdr->p;
xdr->p = p;
}
+ xdr->rqst = rqst;
}
EXPORT_SYMBOL_GPL(xdr_init_decode);
@@ -854,7 +890,7 @@ void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
buf->page_len = len;
buf->buflen = len;
buf->len = len;
- xdr_init_decode(xdr, buf, NULL);
+ xdr_init_decode(xdr, buf, NULL, NULL);
}
EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
@@ -896,20 +932,23 @@ static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
size_t cplen = (char *)xdr->end - (char *)xdr->p;
if (nbytes > xdr->scratch.iov_len)
- return NULL;
+ goto out_overflow;
p = __xdr_inline_decode(xdr, cplen);
if (p == NULL)
return NULL;
memcpy(cpdest, p, cplen);
+ if (!xdr_set_next_buffer(xdr))
+ goto out_overflow;
cpdest += cplen;
nbytes -= cplen;
- if (!xdr_set_next_buffer(xdr))
- return NULL;
p = __xdr_inline_decode(xdr, nbytes);
if (p == NULL)
return NULL;
memcpy(cpdest, p, nbytes);
return xdr->scratch.iov_base;
+out_overflow:
+ trace_rpc_xdr_overflow(xdr, nbytes);
+ return NULL;
}
/**
@@ -926,14 +965,17 @@ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
{
__be32 *p;
- if (nbytes == 0)
+ if (unlikely(nbytes == 0))
return xdr->p;
if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
- return NULL;
+ goto out_overflow;
p = __xdr_inline_decode(xdr, nbytes);
if (p != NULL)
return p;
return xdr_copy_to_scratch(xdr, nbytes);
+out_overflow:
+ trace_rpc_xdr_overflow(xdr, nbytes);
+ return NULL;
}
EXPORT_SYMBOL_GPL(xdr_inline_decode);
@@ -943,13 +985,17 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
struct kvec *iov;
unsigned int nwords = XDR_QUADLEN(len);
unsigned int cur = xdr_stream_pos(xdr);
+ unsigned int copied, offset;
if (xdr->nwords == 0)
return 0;
+
/* Realign pages to current pointer position */
- iov = buf->head;
+ iov = buf->head;
if (iov->iov_len > cur) {
- xdr_shrink_bufhead(buf, iov->iov_len - cur);
+ offset = iov->iov_len - cur;
+ copied = xdr_shrink_bufhead(buf, offset);
+ trace_rpc_xdr_alignment(xdr, offset, copied);
xdr->nwords = XDR_QUADLEN(buf->len - cur);
}
@@ -961,7 +1007,9 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
len = buf->page_len;
else if (nwords < xdr->nwords) {
/* Truncate page data and move it into the tail */
- xdr_shrink_pagelen(buf, buf->page_len - len);
+ offset = buf->page_len - len;
+ copied = xdr_shrink_pagelen(buf, offset);
+ trace_rpc_xdr_alignment(xdr, offset, copied);
xdr->nwords = XDR_QUADLEN(buf->len - cur);
}
return len;
@@ -1102,47 +1150,6 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
}
EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
-/**
- * xdr_buf_trim - lop at most "len" bytes off the end of "buf"
- * @buf: buf to be trimmed
- * @len: number of bytes to reduce "buf" by
- *
- * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note
- * that it's possible that we'll trim less than that amount if the xdr_buf is
- * too small, or if (for instance) it's all in the head and the parser has
- * already read too far into it.
- */
-void xdr_buf_trim(struct xdr_buf *buf, unsigned int len)
-{
- size_t cur;
- unsigned int trim = len;
-
- if (buf->tail[0].iov_len) {
- cur = min_t(size_t, buf->tail[0].iov_len, trim);
- buf->tail[0].iov_len -= cur;
- trim -= cur;
- if (!trim)
- goto fix_len;
- }
-
- if (buf->page_len) {
- cur = min_t(unsigned int, buf->page_len, trim);
- buf->page_len -= cur;
- trim -= cur;
- if (!trim)
- goto fix_len;
- }
-
- if (buf->head[0].iov_len) {
- cur = min_t(size_t, buf->head[0].iov_len, trim);
- buf->head[0].iov_len -= cur;
- trim -= cur;
- }
-fix_len:
- buf->len -= (len - trim);
-}
-EXPORT_SYMBOL_GPL(xdr_buf_trim);
-
static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
{
unsigned int this_len;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f1ec2110efeb..d7117d241460 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -49,6 +49,7 @@
#include <linux/sunrpc/metrics.h>
#include <linux/sunrpc/bc_xprt.h>
#include <linux/rcupdate.h>
+#include <linux/sched/mm.h>
#include <trace/events/sunrpc.h>
@@ -643,11 +644,13 @@ static void xprt_autoclose(struct work_struct *work)
{
struct rpc_xprt *xprt =
container_of(work, struct rpc_xprt, task_cleanup);
+ unsigned int pflags = memalloc_nofs_save();
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
xprt->ops->close(xprt);
xprt_release_write(xprt, NULL);
wake_up_bit(&xprt->state, XPRT_LOCKED);
+ memalloc_nofs_restore(pflags);
}
/**
@@ -661,7 +664,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
spin_lock_bh(&xprt->transport_lock);
xprt_clear_connected(xprt);
xprt_clear_write_space_locked(xprt);
- xprt_wake_pending_tasks(xprt, -EAGAIN);
+ xprt_wake_pending_tasks(xprt, -ENOTCONN);
spin_unlock_bh(&xprt->transport_lock);
}
EXPORT_SYMBOL_GPL(xprt_disconnect_done);
@@ -1165,6 +1168,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
/* Note: req is added _before_ pos */
list_add_tail(&req->rq_xmit, &pos->rq_xmit);
INIT_LIST_HEAD(&req->rq_xmit2);
+ trace_xprt_enq_xmit(task, 1);
goto out;
}
} else if (RPC_IS_SWAPPER(task)) {
@@ -1176,6 +1180,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
/* Note: req is added _before_ pos */
list_add_tail(&req->rq_xmit, &pos->rq_xmit);
INIT_LIST_HEAD(&req->rq_xmit2);
+ trace_xprt_enq_xmit(task, 2);
goto out;
}
} else if (!req->rq_seqno) {
@@ -1184,11 +1189,13 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
continue;
list_add_tail(&req->rq_xmit2, &pos->rq_xmit2);
INIT_LIST_HEAD(&req->rq_xmit);
+ trace_xprt_enq_xmit(task, 3);
goto out;
}
}
list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
INIT_LIST_HEAD(&req->rq_xmit2);
+ trace_xprt_enq_xmit(task, 4);
out:
set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
spin_unlock(&xprt->queue_lock);
@@ -1313,8 +1320,6 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
int is_retrans = RPC_WAS_SENT(task);
int status;
- dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
-
if (!req->rq_bytes_sent) {
if (xprt_request_data_received(task)) {
status = 0;
@@ -1325,6 +1330,13 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
status = -EBADMSG;
goto out_dequeue;
}
+ if (task->tk_ops->rpc_call_prepare_transmit) {
+ task->tk_ops->rpc_call_prepare_transmit(task,
+ task->tk_calldata);
+ status = task->tk_status;
+ if (status < 0)
+ goto out_dequeue;
+ }
}
/*
@@ -1336,9 +1348,9 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
connect_cookie = xprt->connect_cookie;
status = xprt->ops->send_request(req);
- trace_xprt_transmit(xprt, req->rq_xid, status);
if (status != 0) {
req->rq_ntrans--;
+ trace_xprt_transmit(req, status);
return status;
}
@@ -1347,7 +1359,6 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
xprt_inject_disconnect(xprt);
- dprintk("RPC: %5u xmit complete\n", task->tk_pid);
task->tk_flags |= RPC_TASK_SENT;
spin_lock_bh(&xprt->transport_lock);
@@ -1360,6 +1371,7 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
req->rq_connect_cookie = connect_cookie;
out_dequeue:
+ trace_xprt_transmit(req, status);
xprt_request_dequeue_transmit(task);
rpc_wake_up_queued_task_set_status(&xprt->sending, task, status);
return status;
@@ -1599,7 +1611,6 @@ xprt_request_init(struct rpc_task *task)
req->rq_buffer = NULL;
req->rq_xid = xprt_alloc_xid(xprt);
xprt_init_connect_cookie(req, xprt);
- req->rq_bytes_sent = 0;
req->rq_snd_buf.len = 0;
req->rq_snd_buf.buflen = 0;
req->rq_rcv_buf.len = 0;
@@ -1721,6 +1732,7 @@ void xprt_release(struct rpc_task *task)
xprt->ops->buf_free(task);
xprt_inject_disconnect(xprt);
xdr_free_bvec(&req->rq_rcv_buf);
+ xdr_free_bvec(&req->rq_snd_buf);
if (req->rq_cred != NULL)
put_rpccred(req->rq_cred);
task->tk_rqstp = NULL;
@@ -1749,7 +1761,6 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
*/
xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
xbufp->tail[0].iov_len;
- req->rq_bytes_sent = 0;
}
#endif
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 0de9b3e63770..d79b18c1f4cd 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -123,7 +123,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf,
- req->rl_rdmabuf->rg_base);
+ req->rl_rdmabuf->rg_base, rqst);
p = xdr_reserve_space(&req->rl_stream, 28);
if (unlikely(!p))
@@ -267,7 +267,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
/* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0;
- rqst->rq_bytes_sent = 0;
rqst->rq_xid = *p;
rqst->rq_private_buf.len = size;
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 6a561056b538..52cb6c1b0c2b 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -391,7 +391,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
*/
struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_mr_seg *seg,
- int nsegs, bool writing, u32 xid,
+ int nsegs, bool writing, __be32 xid,
struct rpcrdma_mr **out)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
@@ -446,7 +446,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
goto out_mapmr_err;
ibmr->iova &= 0x00000000ffffffff;
- ibmr->iova |= ((u64)cpu_to_be32(xid)) << 32;
+ ibmr->iova |= ((u64)be32_to_cpu(xid)) << 32;
key = (u8)(ibmr->rkey & 0x000000FF);
ib_update_fast_reg_key(ibmr, ++key);
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index d18614e02b4e..6c1fb270f127 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -164,6 +164,21 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read;
}
+/* The client is required to provide a Reply chunk if the maximum
+ * size of the non-payload part of the RPC Reply is larger than
+ * the inline threshold.
+ */
+static bool
+rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
+ const struct rpc_rqst *rqst)
+{
+ const struct xdr_buf *buf = &rqst->rq_rcv_buf;
+ const struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
+ return buf->head[0].iov_len + buf->tail[0].iov_len <
+ ia->ri_max_inline_read;
+}
+
/* Split @vec on page boundaries into SGEs. FMR registers pages, not
* a byte range. Other modes coalesce these SGEs into a single MR
* when they can.
@@ -733,7 +748,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(xdr, &req->rl_hdrbuf,
- req->rl_rdmabuf->rg_base);
+ req->rl_rdmabuf->rg_base, rqst);
/* Fixed header fields */
ret = -EMSGSIZE;
@@ -762,7 +777,8 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
*/
if (rpcrdma_results_inline(r_xprt, rqst))
wtype = rpcrdma_noch;
- else if (ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ)
+ else if ((ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) &&
+ rpcrdma_nonpayload_inline(r_xprt, rqst))
wtype = rpcrdma_writech;
else
wtype = rpcrdma_replych;
@@ -1313,7 +1329,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
/* Fixed transport header fields */
xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
- rep->rr_hdrbuf.head[0].iov_base);
+ rep->rr_hdrbuf.head[0].iov_base, NULL);
p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
if (unlikely(!p))
goto out_shortreply;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index b908f2ca08fd..907464c2a9f0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -304,7 +304,6 @@ xprt_setup_rdma_bc(struct xprt_create *args)
xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
xprt->prot = XPRT_TRANSPORT_BC_RDMA;
- xprt->tsh_size = 0;
xprt->ops = &xprt_rdma_bc_procs;
memcpy(&xprt->addr, args->dstaddr, args->addrlen);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 828b149eaaef..65e2fb9aac65 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -272,11 +272,8 @@ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
return false;
ctxt->rc_temp = true;
ret = __svc_rdma_post_recv(rdma, ctxt);
- if (ret) {
- pr_err("svcrdma: failure posting recv buffers: %d\n",
- ret);
+ if (ret)
return false;
- }
}
return true;
}
@@ -314,17 +311,14 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
spin_lock(&rdma->sc_rq_dto_lock);
list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q);
- spin_unlock(&rdma->sc_rq_dto_lock);
+ /* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */
set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
+ spin_unlock(&rdma->sc_rq_dto_lock);
if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
svc_xprt_enqueue(&rdma->sc_xprt);
goto out;
flushed:
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
- ib_wc_status_msg(wc->status),
- wc->status, wc->vendor_err);
post_err:
svc_rdma_recv_ctxt_put(rdma, ctxt);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index dc1951759a8e..2121c9b4d275 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -64,8 +64,7 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
spin_unlock(&rdma->sc_rw_ctxt_lock);
} else {
spin_unlock(&rdma->sc_rw_ctxt_lock);
- ctxt = kmalloc(sizeof(*ctxt) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist),
+ ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
GFP_KERNEL);
if (!ctxt)
goto out;
@@ -213,13 +212,8 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
- if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ if (unlikely(wc->status != IB_WC_SUCCESS))
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- pr_err("svcrdma: write ctx: %s (%u/0x%x)\n",
- ib_wc_status_msg(wc->status),
- wc->status, wc->vendor_err);
- }
svc_rdma_write_info_free(info);
}
@@ -278,18 +272,15 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
if (unlikely(wc->status != IB_WC_SUCCESS)) {
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- pr_err("svcrdma: read ctx: %s (%u/0x%x)\n",
- ib_wc_status_msg(wc->status),
- wc->status, wc->vendor_err);
svc_rdma_recv_ctxt_put(rdma, info->ri_readctxt);
} else {
spin_lock(&rdma->sc_rq_dto_lock);
list_add_tail(&info->ri_readctxt->rc_list,
&rdma->sc_read_complete_q);
+ /* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */
+ set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
spin_unlock(&rdma->sc_rq_dto_lock);
- set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
svc_xprt_enqueue(&rdma->sc_xprt);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 1f200119268c..6fdba72f89f4 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -272,10 +272,6 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
if (unlikely(wc->status != IB_WC_SUCCESS)) {
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_enqueue(&rdma->sc_xprt);
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- pr_err("svcrdma: Send: %s (%u/0x%x)\n",
- ib_wc_status_msg(wc->status),
- wc->status, wc->vendor_err);
}
svc_xprt_put(&rdma->sc_xprt);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 57f86c63a463..027a3b07d329 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -390,8 +390,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct ib_qp_init_attr qp_attr;
unsigned int ctxts, rq_depth;
struct ib_device *dev;
- struct sockaddr *sap;
int ret = 0;
+ RPC_IFDEBUG(struct sockaddr *sap);
listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
clear_bit(XPT_CONN, &xprt->xpt_flags);
@@ -525,6 +525,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (ret)
goto errout;
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
dprintk("svcrdma: new connection %p accepted:\n", newxprt);
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
@@ -535,6 +536,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk(" rdma_rw_ctxs : %d\n", ctxts);
dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
dprintk(" ord : %d\n", conn_param.initiator_depth);
+#endif
trace_svcrdma_xprt_accept(&newxprt->sc_xprt);
return &newxprt->sc_xprt;
@@ -588,11 +590,6 @@ static void __svc_rdma_free(struct work_struct *work)
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
ib_drain_qp(rdma->sc_qp);
- /* We should only be called from kref_put */
- if (kref_read(&xprt->xpt_ref) != 0)
- pr_err("svcrdma: sc_xprt still in use? (%d)\n",
- kref_read(&xprt->xpt_ref));
-
svc_rdma_flush_recv_queues(rdma);
/* Final put of backchannel client transport */
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index fbc171ebfe91..5d261353bd90 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -332,7 +332,6 @@ xprt_setup_rdma(struct xprt_create *args)
xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
xprt->resvport = 0; /* privileged port not needed */
- xprt->tsh_size = 0; /* RPC-RDMA handles framing */
xprt->ops = &xprt_rdma_procs;
/*
@@ -738,7 +737,6 @@ xprt_rdma_send_request(struct rpc_rqst *rqst)
goto drop_connection;
rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
- rqst->rq_bytes_sent = 0;
/* An RPC with no reply will throw off credit accounting,
* so drop the connection to reset the credit grant.
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 21113bfd4eca..89a63391d4d4 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1481,6 +1481,8 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
if (ep->rep_receive_count > needed)
goto out;
needed -= ep->rep_receive_count;
+ if (!temp)
+ needed += RPCRDMA_MAX_RECV_BATCH;
count = 0;
wr = NULL;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 5a18472f2c9c..10f6593e1a6a 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -205,6 +205,16 @@ struct rpcrdma_rep {
struct ib_recv_wr rr_recv_wr;
};
+/* To reduce the rate at which a transport invokes ib_post_recv
+ * (and thus the hardware doorbell rate), xprtrdma posts Receive
+ * WRs in batches.
+ *
+ * Setting this to zero disables Receive post batching.
+ */
+enum {
+ RPCRDMA_MAX_RECV_BATCH = 7,
+};
+
/* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes
*/
struct rpcrdma_req;
@@ -577,7 +587,7 @@ void frwr_release_mr(struct rpcrdma_mr *mr);
size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_mr_seg *seg,
- int nsegs, bool writing, u32 xid,
+ int nsegs, bool writing, __be32 xid,
struct rpcrdma_mr **mr);
int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req);
void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 7754aa3e434f..732d4b57411a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -50,6 +50,7 @@
#include <linux/bvec.h>
#include <linux/highmem.h>
#include <linux/uio.h>
+#include <linux/sched/mm.h>
#include <trace/events/sunrpc.h>
@@ -404,8 +405,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
size_t want, seek_init = seek, offset = 0;
ssize_t ret;
- if (seek < buf->head[0].iov_len) {
- want = min_t(size_t, count, buf->head[0].iov_len);
+ want = min_t(size_t, count, buf->head[0].iov_len);
+ if (seek < want) {
ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek);
if (ret <= 0)
goto sock_err;
@@ -416,13 +417,13 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
goto out;
seek = 0;
} else {
- seek -= buf->head[0].iov_len;
- offset += buf->head[0].iov_len;
+ seek -= want;
+ offset += want;
}
want = xs_alloc_sparse_pages(buf,
min_t(size_t, count - offset, buf->page_len),
- GFP_NOWAIT);
+ GFP_KERNEL);
if (seek < want) {
ret = xs_read_bvec(sock, msg, flags, buf->bvec,
xdr_buf_pagecount(buf),
@@ -442,8 +443,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
offset += want;
}
- if (seek < buf->tail[0].iov_len) {
- want = min_t(size_t, count - offset, buf->tail[0].iov_len);
+ want = min_t(size_t, count - offset, buf->tail[0].iov_len);
+ if (seek < want) {
ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
if (ret <= 0)
goto sock_err;
@@ -452,8 +453,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
goto out;
if (ret != want)
goto out;
- } else
- offset += buf->tail[0].iov_len;
+ } else if (offset < seek_init)
+ offset = seek_init;
ret = -EMSGSIZE;
out:
*read = offset - seek_init;
@@ -481,28 +482,40 @@ xs_read_stream_request_done(struct sock_xprt *transport)
return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT);
}
+static void
+xs_read_stream_check_eor(struct sock_xprt *transport,
+ struct msghdr *msg)
+{
+ if (xs_read_stream_request_done(transport))
+ msg->msg_flags |= MSG_EOR;
+}
+
static ssize_t
xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
int flags, struct rpc_rqst *req)
{
struct xdr_buf *buf = &req->rq_private_buf;
- size_t want, read;
- ssize_t ret;
+ size_t want, uninitialized_var(read);
+ ssize_t uninitialized_var(ret);
xs_read_header(transport, buf);
want = transport->recv.len - transport->recv.offset;
- ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
- transport->recv.copied + want, transport->recv.copied,
- &read);
- transport->recv.offset += read;
- transport->recv.copied += read;
- if (transport->recv.offset == transport->recv.len) {
- if (xs_read_stream_request_done(transport))
- msg->msg_flags |= MSG_EOR;
- return read;
+ if (want != 0) {
+ ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
+ transport->recv.copied + want,
+ transport->recv.copied,
+ &read);
+ transport->recv.offset += read;
+ transport->recv.copied += read;
}
+ if (transport->recv.offset == transport->recv.len)
+ xs_read_stream_check_eor(transport, msg);
+
+ if (want == 0)
+ return 0;
+
switch (ret) {
default:
break;
@@ -655,13 +668,35 @@ out_err:
return ret != 0 ? ret : -ESHUTDOWN;
}
+static __poll_t xs_poll_socket(struct sock_xprt *transport)
+{
+ return transport->sock->ops->poll(transport->file, transport->sock,
+ NULL);
+}
+
+static bool xs_poll_socket_readable(struct sock_xprt *transport)
+{
+ __poll_t events = xs_poll_socket(transport);
+
+ return (events & (EPOLLIN | EPOLLRDNORM)) && !(events & EPOLLRDHUP);
+}
+
+static void xs_poll_check_readable(struct sock_xprt *transport)
+{
+
+ clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+ if (!xs_poll_socket_readable(transport))
+ return;
+ if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
+ queue_work(xprtiod_workqueue, &transport->recv_worker);
+}
+
static void xs_stream_data_receive(struct sock_xprt *transport)
{
size_t read = 0;
ssize_t ret = 0;
mutex_lock(&transport->recv_mutex);
- clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
if (transport->sock == NULL)
goto out;
for (;;) {
@@ -671,6 +706,10 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
read += ret;
cond_resched();
}
+ if (ret == -ESHUTDOWN)
+ kernel_sock_shutdown(transport->sock, SHUT_RDWR);
+ else
+ xs_poll_check_readable(transport);
out:
mutex_unlock(&transport->recv_mutex);
trace_xs_stream_read_data(&transport->xprt, ret, read);
@@ -680,7 +719,10 @@ static void xs_stream_data_receive_workfn(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, recv_worker);
+ unsigned int pflags = memalloc_nofs_save();
+
xs_stream_data_receive(transport);
+ memalloc_nofs_restore(pflags);
}
static void
@@ -690,65 +732,65 @@ xs_stream_reset_connect(struct sock_xprt *transport)
transport->recv.len = 0;
transport->recv.copied = 0;
transport->xmit.offset = 0;
+}
+
+static void
+xs_stream_start_connect(struct sock_xprt *transport)
+{
transport->xprt.stat.connect_count++;
transport->xprt.stat.connect_start = jiffies;
}
#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
-static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
+static int xs_sendmsg(struct socket *sock, struct msghdr *msg, size_t seek)
{
- struct msghdr msg = {
- .msg_name = addr,
- .msg_namelen = addrlen,
- .msg_flags = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0),
- };
- struct kvec iov = {
- .iov_base = vec->iov_base + base,
- .iov_len = vec->iov_len - base,
- };
+ if (seek)
+ iov_iter_advance(&msg->msg_iter, seek);
+ return sock_sendmsg(sock, msg);
+}
- if (iov.iov_len != 0)
- return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
- return kernel_sendmsg(sock, &msg, NULL, 0, 0);
+static int xs_send_kvec(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t seek)
+{
+ iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len);
+ return xs_sendmsg(sock, msg, seek);
}
-static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy, int *sent_p)
+static int xs_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base)
{
- ssize_t (*do_sendpage)(struct socket *sock, struct page *page,
- int offset, size_t size, int flags);
- struct page **ppage;
- unsigned int remainder;
int err;
- remainder = xdr->page_len - base;
- base += xdr->page_base;
- ppage = xdr->pages + (base >> PAGE_SHIFT);
- base &= ~PAGE_MASK;
- do_sendpage = sock->ops->sendpage;
- if (!zerocopy)
- do_sendpage = sock_no_sendpage;
- for(;;) {
- unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder);
- int flags = XS_SENDMSG_FLAGS;
+ err = xdr_alloc_bvec(xdr, GFP_KERNEL);
+ if (err < 0)
+ return err;
- remainder -= len;
- if (more)
- flags |= MSG_MORE;
- if (remainder != 0)
- flags |= MSG_SENDPAGE_NOTLAST | MSG_MORE;
- err = do_sendpage(sock, *ppage, base, len, flags);
- if (remainder == 0 || err != len)
- break;
- *sent_p += err;
- ppage++;
- base = 0;
- }
- if (err > 0) {
- *sent_p += err;
- err = 0;
- }
- return err;
+ iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec,
+ xdr_buf_pagecount(xdr),
+ xdr->page_len + xdr->page_base);
+ return xs_sendmsg(sock, msg, base + xdr->page_base);
+}
+
+#define xs_record_marker_len() sizeof(rpc_fraghdr)
+
+/* Common case:
+ * - stream transport
+ * - sending from byte 0 of the message
+ * - the message is wholly contained in @xdr's head iovec
+ */
+static int xs_send_rm_and_kvec(struct socket *sock, struct msghdr *msg,
+ rpc_fraghdr marker, struct kvec *vec, size_t base)
+{
+ struct kvec iov[2] = {
+ [0] = {
+ .iov_base = &marker,
+ .iov_len = sizeof(marker)
+ },
+ [1] = *vec,
+ };
+ size_t len = iov[0].iov_len + iov[1].iov_len;
+
+ iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len);
+ return xs_sendmsg(sock, msg, base);
}
/**
@@ -758,49 +800,60 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
* @addrlen: UDP only -- length of destination address
* @xdr: buffer containing this request
* @base: starting position in the buffer
- * @zerocopy: true if it is safe to use sendpage()
+ * @rm: stream record marker field
* @sent_p: return the total number of bytes successfully queued for sending
*
*/
-static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy, int *sent_p)
+static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, rpc_fraghdr rm, int *sent_p)
{
- unsigned int remainder = xdr->len - base;
+ struct msghdr msg = {
+ .msg_name = addr,
+ .msg_namelen = addrlen,
+ .msg_flags = XS_SENDMSG_FLAGS | MSG_MORE,
+ };
+ unsigned int rmsize = rm ? sizeof(rm) : 0;
+ unsigned int remainder = rmsize + xdr->len - base;
+ unsigned int want;
int err = 0;
- int sent = 0;
if (unlikely(!sock))
return -ENOTSOCK;
- if (base != 0) {
- addr = NULL;
- addrlen = 0;
- }
-
- if (base < xdr->head[0].iov_len || addr != NULL) {
- unsigned int len = xdr->head[0].iov_len - base;
+ want = xdr->head[0].iov_len + rmsize;
+ if (base < want) {
+ unsigned int len = want - base;
remainder -= len;
- err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0);
+ if (remainder == 0)
+ msg.msg_flags &= ~MSG_MORE;
+ if (rmsize)
+ err = xs_send_rm_and_kvec(sock, &msg, rm,
+ &xdr->head[0], base);
+ else
+ err = xs_send_kvec(sock, &msg, &xdr->head[0], base);
if (remainder == 0 || err != len)
goto out;
*sent_p += err;
base = 0;
} else
- base -= xdr->head[0].iov_len;
+ base -= want;
if (base < xdr->page_len) {
unsigned int len = xdr->page_len - base;
remainder -= len;
- err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy, &sent);
- *sent_p += sent;
- if (remainder == 0 || sent != len)
+ if (remainder == 0)
+ msg.msg_flags &= ~MSG_MORE;
+ err = xs_send_pagedata(sock, &msg, xdr, base);
+ if (remainder == 0 || err != len)
goto out;
+ *sent_p += err;
base = 0;
} else
base -= xdr->page_len;
if (base >= xdr->tail[0].iov_len)
return 0;
- err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0);
+ msg.msg_flags &= ~MSG_MORE;
+ err = xs_send_kvec(sock, &msg, &xdr->tail[0], base);
out:
if (err > 0) {
*sent_p += err;
@@ -856,7 +909,7 @@ static int xs_nospace(struct rpc_rqst *req)
static void
xs_stream_prepare_request(struct rpc_rqst *req)
{
- req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_NOIO);
+ req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL);
}
/*
@@ -870,13 +923,14 @@ xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req)
}
/*
- * Construct a stream transport record marker in @buf.
+ * Return the stream record marker field for a record of length < 2^31-1
*/
-static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
+static rpc_fraghdr
+xs_stream_record_marker(struct xdr_buf *xdr)
{
- u32 reclen = buf->len - sizeof(rpc_fraghdr);
- rpc_fraghdr *base = buf->head[0].iov_base;
- *base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen);
+ if (!xdr->len)
+ return 0;
+ return cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len);
}
/**
@@ -905,15 +959,14 @@ static int xs_local_send_request(struct rpc_rqst *req)
return -ENOTCONN;
}
- xs_encode_stream_record_marker(&req->rq_snd_buf);
-
xs_pktdump("packet data:",
req->rq_svec->iov_base, req->rq_svec->iov_len);
req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, NULL, 0, xdr,
transport->xmit.offset,
- true, &sent);
+ xs_stream_record_marker(xdr),
+ &sent);
dprintk("RPC: %s(%u) = %d\n",
__func__, xdr->len - transport->xmit.offset, status);
@@ -925,7 +978,6 @@ static int xs_local_send_request(struct rpc_rqst *req)
req->rq_bytes_sent = transport->xmit.offset;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
- req->rq_bytes_sent = 0;
transport->xmit.offset = 0;
return 0;
}
@@ -981,7 +1033,7 @@ static int xs_udp_send_request(struct rpc_rqst *req)
req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
- xdr, 0, true, &sent);
+ xdr, 0, 0, &sent);
dprintk("RPC: xs_udp_send_request(%u) = %d\n",
xdr->len, status);
@@ -1045,7 +1097,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
- bool zerocopy = true;
bool vm_wait = false;
int status;
int sent;
@@ -1057,17 +1108,9 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
return -ENOTCONN;
}
- xs_encode_stream_record_marker(&req->rq_snd_buf);
-
xs_pktdump("packet data:",
req->rq_svec->iov_base,
req->rq_svec->iov_len);
- /* Don't use zero copy if this is a resend. If the RPC call
- * completes while the socket holds a reference to the pages,
- * then we may end up resending corrupted data.
- */
- if (req->rq_task->tk_flags & RPC_TASK_SENT)
- zerocopy = false;
if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
xs_tcp_set_socket_timeouts(xprt, transport->sock);
@@ -1080,7 +1123,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
sent = 0;
status = xs_sendpages(transport->sock, NULL, 0, xdr,
transport->xmit.offset,
- zerocopy, &sent);
+ xs_stream_record_marker(xdr),
+ &sent);
dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
xdr->len - transport->xmit.offset, status);
@@ -1091,7 +1135,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
req->rq_bytes_sent = transport->xmit.offset;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
- req->rq_bytes_sent = 0;
transport->xmit.offset = 0;
return 0;
}
@@ -1211,6 +1254,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
struct socket *sock = transport->sock;
struct sock *sk = transport->inet;
struct rpc_xprt *xprt = &transport->xprt;
+ struct file *filp = transport->file;
if (sk == NULL)
return;
@@ -1224,6 +1268,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
write_lock_bh(&sk->sk_callback_lock);
transport->inet = NULL;
transport->sock = NULL;
+ transport->file = NULL;
sk->sk_user_data = NULL;
@@ -1231,10 +1276,12 @@ static void xs_reset_transport(struct sock_xprt *transport)
xprt_clear_connected(xprt);
write_unlock_bh(&sk->sk_callback_lock);
xs_sock_reset_connection_flags(xprt);
+ /* Reset stream record info */
+ xs_stream_reset_connect(transport);
mutex_unlock(&transport->recv_mutex);
trace_rpc_socket_close(xprt, sock);
- sock_release(sock);
+ fput(filp);
xprt_disconnect_done(xprt);
}
@@ -1358,7 +1405,6 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
int err;
mutex_lock(&transport->recv_mutex);
- clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
sk = transport->inet;
if (sk == NULL)
goto out;
@@ -1370,6 +1416,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
consume_skb(skb);
cond_resched();
}
+ xs_poll_check_readable(transport);
out:
mutex_unlock(&transport->recv_mutex);
}
@@ -1378,7 +1425,10 @@ static void xs_udp_data_receive_workfn(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, recv_worker);
+ unsigned int pflags = memalloc_nofs_save();
+
xs_udp_data_receive(transport);
+ memalloc_nofs_restore(pflags);
}
/**
@@ -1826,6 +1876,7 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
struct sock_xprt *transport, int family, int type,
int protocol, bool reuseport)
{
+ struct file *filp;
struct socket *sock;
int err;
@@ -1846,6 +1897,11 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
goto out;
}
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ if (IS_ERR(filp))
+ return ERR_CAST(filp);
+ transport->file = filp;
+
return sock;
out:
return ERR_PTR(err);
@@ -1869,7 +1925,6 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
sk->sk_write_space = xs_udp_write_space;
sock_set_flag(sk, SOCK_FASYNC);
sk->sk_error_report = xs_error_report;
- sk->sk_allocation = GFP_NOIO;
xprt_clear_connected(xprt);
@@ -1880,7 +1935,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
write_unlock_bh(&sk->sk_callback_lock);
}
- xs_stream_reset_connect(transport);
+ xs_stream_start_connect(transport);
return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
}
@@ -1892,6 +1947,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
static int xs_local_setup_socket(struct sock_xprt *transport)
{
struct rpc_xprt *xprt = &transport->xprt;
+ struct file *filp;
struct socket *sock;
int status = -EIO;
@@ -1904,6 +1960,13 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
}
xs_reclassify_socket(AF_LOCAL, sock);
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ if (IS_ERR(filp)) {
+ status = PTR_ERR(filp);
+ goto out;
+ }
+ transport->file = filp;
+
dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
@@ -2057,7 +2120,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_data_ready = xs_data_ready;
sk->sk_write_space = xs_udp_write_space;
sock_set_flag(sk, SOCK_FASYNC);
- sk->sk_allocation = GFP_NOIO;
xprt_set_connected(xprt);
@@ -2220,7 +2282,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_write_space = xs_tcp_write_space;
sock_set_flag(sk, SOCK_FASYNC);
sk->sk_error_report = xs_error_report;
- sk->sk_allocation = GFP_NOIO;
/* socket options */
sock_reset_flag(sk, SOCK_LINGER);
@@ -2240,8 +2301,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
xs_set_memalloc(xprt);
- /* Reset TCP record info */
- xs_stream_reset_connect(transport);
+ xs_stream_start_connect(transport);
/* Tell the socket layer to start connecting... */
set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
@@ -2534,26 +2594,35 @@ static int bc_sendto(struct rpc_rqst *req)
{
int len;
struct xdr_buf *xbufp = &req->rq_snd_buf;
- struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport =
- container_of(xprt, struct sock_xprt, xprt);
- struct socket *sock = transport->sock;
+ container_of(req->rq_xprt, struct sock_xprt, xprt);
unsigned long headoff;
unsigned long tailoff;
+ struct page *tailpage;
+ struct msghdr msg = {
+ .msg_flags = MSG_MORE
+ };
+ rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
+ (u32)xbufp->len);
+ struct kvec iov = {
+ .iov_base = &marker,
+ .iov_len = sizeof(marker),
+ };
- xs_encode_stream_record_marker(xbufp);
+ len = kernel_sendmsg(transport->sock, &msg, &iov, 1, iov.iov_len);
+ if (len != iov.iov_len)
+ return -EAGAIN;
+ tailpage = NULL;
+ if (xbufp->tail[0].iov_len)
+ tailpage = virt_to_page(xbufp->tail[0].iov_base);
tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
- len = svc_send_common(sock, xbufp,
+ len = svc_send_common(transport->sock, xbufp,
virt_to_page(xbufp->head[0].iov_base), headoff,
- xbufp->tail[0].iov_base, tailoff);
-
- if (len != xbufp->len) {
- printk(KERN_NOTICE "Error sending entire callback!\n");
- len = -EAGAIN;
- }
-
+ tailpage, tailoff);
+ if (len != xbufp->len)
+ return -EAGAIN;
return len;
}
@@ -2793,7 +2862,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);
xprt->prot = 0;
- xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
xprt->bind_timeout = XS_BIND_TO;
@@ -2862,7 +2930,6 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);
xprt->prot = IPPROTO_UDP;
- xprt->tsh_size = 0;
/* XXX: header size can vary due to auth type, IPv6, etc. */
xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
@@ -2942,7 +3009,6 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);
xprt->prot = IPPROTO_TCP;
- xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
xprt->bind_timeout = XS_BIND_TO;
@@ -3015,7 +3081,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);
xprt->prot = IPPROTO_TCP;
- xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
xprt->timeout = &xs_tcp_default_timeout;
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 06fee142f09f..63f39201e41e 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -919,6 +919,9 @@ int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb)
{
struct nlattr *group = nla_nest_start(skb, TIPC_NLA_SOCK_GROUP);
+ if (!group)
+ return -EMSGSIZE;
+
if (nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_ID,
grp->type) ||
nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_INSTANCE,
diff --git a/net/tipc/net.c b/net/tipc/net.c
index f076edb74338..7ce1e86b024f 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -163,12 +163,9 @@ void tipc_sched_net_finalize(struct net *net, u32 addr)
void tipc_net_stop(struct net *net)
{
- u32 self = tipc_own_addr(net);
-
- if (!self)
+ if (!tipc_own_id(net))
return;
- tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, self, self);
rtnl_lock();
tipc_bearer_stop(net);
tipc_node_stop(net);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2dc4919ab23c..dd3b6dc17662 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -817,10 +817,10 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
{
struct tipc_link_entry *le = &n->links[bearer_id];
+ struct tipc_media_addr *maddr = NULL;
struct tipc_link *l = le->link;
- struct tipc_media_addr *maddr;
- struct sk_buff_head xmitq;
int old_bearer_id = bearer_id;
+ struct sk_buff_head xmitq;
if (!l)
return;
@@ -844,7 +844,8 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
tipc_node_write_unlock(n);
if (delete)
tipc_mon_remove_peer(n->net, n->addr, old_bearer_id);
- tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
+ if (!skb_queue_empty(&xmitq))
+ tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
tipc_sk_rcv(n->net, &le->inputq);
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e482b342bfa8..b542f14ed444 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1333,7 +1333,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
if (unlikely(!dest)) {
dest = &tsk->peer;
- if (!syn || dest->family != AF_TIPC)
+ if (!syn && dest->family != AF_TIPC)
return -EDESTADDRREQ;
}
@@ -2349,6 +2349,16 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
return 0;
}
+static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr)
+{
+ if (addr->family != AF_TIPC)
+ return false;
+ if (addr->addrtype == TIPC_SERVICE_RANGE)
+ return (addr->addr.nameseq.lower <= addr->addr.nameseq.upper);
+ return (addr->addrtype == TIPC_SERVICE_ADDR ||
+ addr->addrtype == TIPC_SOCKET_ADDR);
+}
+
/**
* tipc_connect - establish a connection to another TIPC port
* @sock: socket structure
@@ -2384,18 +2394,18 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
if (!tipc_sk_type_connectionless(sk))
res = -EINVAL;
goto exit;
- } else if (dst->family != AF_TIPC) {
- res = -EINVAL;
}
- if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
+ if (!tipc_sockaddr_is_sane(dst)) {
res = -EINVAL;
- if (res)
goto exit;
-
+ }
/* DGRAM/RDM connect(), just save the destaddr */
if (tipc_sk_type_connectionless(sk)) {
memcpy(&tsk->peer, dest, destlen);
goto exit;
+ } else if (dst->addrtype == TIPC_SERVICE_RANGE) {
+ res = -EINVAL;
+ goto exit;
}
previous = sk->sk_state;
@@ -3255,6 +3265,8 @@ static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
peer_port = tsk_peer_port(tsk);
nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON);
+ if (!nest)
+ return -EMSGSIZE;
if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node))
goto msg_full;
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 4a708a4e8583..b45932d78004 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -363,6 +363,7 @@ static int tipc_conn_rcv_sub(struct tipc_topsrv *srv,
struct tipc_subscription *sub;
if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
+ s->filter &= __constant_ntohl(~TIPC_SUB_CANCEL);
tipc_conn_delete_sub(con, s);
return 0;
}
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 4a1da837a733..135a7ee9db03 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -558,9 +558,6 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
MSG_DONTWAIT | MSG_NOSIGNAL);
sk->sk_allocation = sk_allocation;
}
-
- if (!rc)
- ctx->sk_write_space(sk);
}
void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 17e8667917aa..df921a2904b9 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -146,7 +146,6 @@ retry:
}
ctx->in_tcp_sendpages = false;
- ctx->sk_write_space(sk);
return 0;
}
@@ -228,6 +227,8 @@ static void tls_write_space(struct sock *sk)
else
#endif
tls_sw_write_space(sk, ctx);
+
+ ctx->sk_write_space(sk);
}
static void tls_ctx_free(struct tls_context *ctx)
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index 8b31ab85d050..3b9e450656a4 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -19,6 +19,11 @@ config UNIX
Say Y unless you know what you are doing.
+config UNIX_SCM
+ bool
+ depends on UNIX
+ default y
+
config UNIX_DIAG
tristate "UNIX: socket monitoring interface"
depends on UNIX
diff --git a/net/unix/Makefile b/net/unix/Makefile
index ffd0a275c3a7..54e58cc4f945 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -10,3 +10,5 @@ unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o
obj-$(CONFIG_UNIX_DIAG) += unix_diag.o
unix_diag-y := diag.o
+
+obj-$(CONFIG_UNIX_SCM) += scm.o
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a95d479caeea..ddb838a1b74c 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -119,6 +119,8 @@
#include <linux/freezer.h>
#include <linux/file.h>
+#include "scm.h"
+
struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
EXPORT_SYMBOL_GPL(unix_socket_table);
DEFINE_SPINLOCK(unix_table_lock);
@@ -1496,67 +1498,6 @@ out:
return err;
}
-static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
- int i;
-
- scm->fp = UNIXCB(skb).fp;
- UNIXCB(skb).fp = NULL;
-
- for (i = scm->fp->count-1; i >= 0; i--)
- unix_notinflight(scm->fp->user, scm->fp->fp[i]);
-}
-
-static void unix_destruct_scm(struct sk_buff *skb)
-{
- struct scm_cookie scm;
- memset(&scm, 0, sizeof(scm));
- scm.pid = UNIXCB(skb).pid;
- if (UNIXCB(skb).fp)
- unix_detach_fds(&scm, skb);
-
- /* Alas, it calls VFS */
- /* So fscking what? fput() had been SMP-safe since the last Summer */
- scm_destroy(&scm);
- sock_wfree(skb);
-}
-
-/*
- * The "user->unix_inflight" variable is protected by the garbage
- * collection lock, and we just read it locklessly here. If you go
- * over the limit, there might be a tiny race in actually noticing
- * it across threads. Tough.
- */
-static inline bool too_many_unix_fds(struct task_struct *p)
-{
- struct user_struct *user = current_user();
-
- if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
- return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
- return false;
-}
-
-static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
- int i;
-
- if (too_many_unix_fds(current))
- return -ETOOMANYREFS;
-
- /*
- * Need to duplicate file references for the sake of garbage
- * collection. Otherwise a socket in the fps might become a
- * candidate for GC while the skb is not yet queued.
- */
- UNIXCB(skb).fp = scm_fp_dup(scm->fp);
- if (!UNIXCB(skb).fp)
- return -ENOMEM;
-
- for (i = scm->fp->count - 1; i >= 0; i--)
- unix_inflight(scm->fp->user, scm->fp->fp[i]);
- return 0;
-}
-
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
{
int err = 0;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index c36757e72844..8bbe1b8e4ff7 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -86,77 +86,13 @@
#include <net/scm.h>
#include <net/tcp_states.h>
+#include "scm.h"
+
/* Internal data structures and random procedures: */
-static LIST_HEAD(gc_inflight_list);
static LIST_HEAD(gc_candidates);
-static DEFINE_SPINLOCK(unix_gc_lock);
static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
-unsigned int unix_tot_inflight;
-
-struct sock *unix_get_socket(struct file *filp)
-{
- struct sock *u_sock = NULL;
- struct inode *inode = file_inode(filp);
-
- /* Socket ? */
- if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
- struct socket *sock = SOCKET_I(inode);
- struct sock *s = sock->sk;
-
- /* PF_UNIX ? */
- if (s && sock->ops && sock->ops->family == PF_UNIX)
- u_sock = s;
- }
- return u_sock;
-}
-
-/* Keep the number of times in flight count for the file
- * descriptor if it is for an AF_UNIX socket.
- */
-
-void unix_inflight(struct user_struct *user, struct file *fp)
-{
- struct sock *s = unix_get_socket(fp);
-
- spin_lock(&unix_gc_lock);
-
- if (s) {
- struct unix_sock *u = unix_sk(s);
-
- if (atomic_long_inc_return(&u->inflight) == 1) {
- BUG_ON(!list_empty(&u->link));
- list_add_tail(&u->link, &gc_inflight_list);
- } else {
- BUG_ON(list_empty(&u->link));
- }
- unix_tot_inflight++;
- }
- user->unix_inflight++;
- spin_unlock(&unix_gc_lock);
-}
-
-void unix_notinflight(struct user_struct *user, struct file *fp)
-{
- struct sock *s = unix_get_socket(fp);
-
- spin_lock(&unix_gc_lock);
-
- if (s) {
- struct unix_sock *u = unix_sk(s);
-
- BUG_ON(!atomic_long_read(&u->inflight));
- BUG_ON(list_empty(&u->link));
-
- if (atomic_long_dec_and_test(&u->inflight))
- list_del_init(&u->link);
- unix_tot_inflight--;
- }
- user->unix_inflight--;
- spin_unlock(&unix_gc_lock);
-}
-
static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
struct sk_buff_head *hitlist)
{
diff --git a/net/unix/scm.c b/net/unix/scm.c
new file mode 100644
index 000000000000..8c40f2b32392
--- /dev/null
+++ b/net/unix/scm.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/fs.h>
+#include <net/af_unix.h>
+#include <net/scm.h>
+#include <linux/init.h>
+
+#include "scm.h"
+
+unsigned int unix_tot_inflight;
+EXPORT_SYMBOL(unix_tot_inflight);
+
+LIST_HEAD(gc_inflight_list);
+EXPORT_SYMBOL(gc_inflight_list);
+
+DEFINE_SPINLOCK(unix_gc_lock);
+EXPORT_SYMBOL(unix_gc_lock);
+
+struct sock *unix_get_socket(struct file *filp)
+{
+ struct sock *u_sock = NULL;
+ struct inode *inode = file_inode(filp);
+
+ /* Socket ? */
+ if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
+ struct socket *sock = SOCKET_I(inode);
+ struct sock *s = sock->sk;
+
+ /* PF_UNIX ? */
+ if (s && sock->ops && sock->ops->family == PF_UNIX)
+ u_sock = s;
+ } else {
+ /* Could be an io_uring instance */
+ u_sock = io_uring_get_socket(filp);
+ }
+ return u_sock;
+}
+EXPORT_SYMBOL(unix_get_socket);
+
+/* Keep the number of times in flight count for the file
+ * descriptor if it is for an AF_UNIX socket.
+ */
+void unix_inflight(struct user_struct *user, struct file *fp)
+{
+ struct sock *s = unix_get_socket(fp);
+
+ spin_lock(&unix_gc_lock);
+
+ if (s) {
+ struct unix_sock *u = unix_sk(s);
+
+ if (atomic_long_inc_return(&u->inflight) == 1) {
+ BUG_ON(!list_empty(&u->link));
+ list_add_tail(&u->link, &gc_inflight_list);
+ } else {
+ BUG_ON(list_empty(&u->link));
+ }
+ unix_tot_inflight++;
+ }
+ user->unix_inflight++;
+ spin_unlock(&unix_gc_lock);
+}
+
+void unix_notinflight(struct user_struct *user, struct file *fp)
+{
+ struct sock *s = unix_get_socket(fp);
+
+ spin_lock(&unix_gc_lock);
+
+ if (s) {
+ struct unix_sock *u = unix_sk(s);
+
+ BUG_ON(!atomic_long_read(&u->inflight));
+ BUG_ON(list_empty(&u->link));
+
+ if (atomic_long_dec_and_test(&u->inflight))
+ list_del_init(&u->link);
+ unix_tot_inflight--;
+ }
+ user->unix_inflight--;
+ spin_unlock(&unix_gc_lock);
+}
+
+/*
+ * The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+ struct user_struct *user = current_user();
+
+ if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+ return false;
+}
+
+int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ int i;
+
+ if (too_many_unix_fds(current))
+ return -ETOOMANYREFS;
+
+ /*
+ * Need to duplicate file references for the sake of garbage
+ * collection. Otherwise a socket in the fps might become a
+ * candidate for GC while the skb is not yet queued.
+ */
+ UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+ if (!UNIXCB(skb).fp)
+ return -ENOMEM;
+
+ for (i = scm->fp->count - 1; i >= 0; i--)
+ unix_inflight(scm->fp->user, scm->fp->fp[i]);
+ return 0;
+}
+EXPORT_SYMBOL(unix_attach_fds);
+
+void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ int i;
+
+ scm->fp = UNIXCB(skb).fp;
+ UNIXCB(skb).fp = NULL;
+
+ for (i = scm->fp->count-1; i >= 0; i--)
+ unix_notinflight(scm->fp->user, scm->fp->fp[i]);
+}
+EXPORT_SYMBOL(unix_detach_fds);
+
+void unix_destruct_scm(struct sk_buff *skb)
+{
+ struct scm_cookie scm;
+
+ memset(&scm, 0, sizeof(scm));
+ scm.pid = UNIXCB(skb).pid;
+ if (UNIXCB(skb).fp)
+ unix_detach_fds(&scm, skb);
+
+ /* Alas, it calls VFS */
+ /* So fscking what? fput() had been SMP-safe since the last Summer */
+ scm_destroy(&scm);
+ sock_wfree(skb);
+}
+EXPORT_SYMBOL(unix_destruct_scm);
diff --git a/net/unix/scm.h b/net/unix/scm.h
new file mode 100644
index 000000000000..5a255a477f16
--- /dev/null
+++ b/net/unix/scm.h
@@ -0,0 +1,10 @@
+#ifndef NET_UNIX_SCM_H
+#define NET_UNIX_SCM_H
+
+extern struct list_head gc_inflight_list;
+extern spinlock_t unix_gc_lock;
+
+int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb);
+void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb);
+
+#endif
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 3ae3a33da70b..602715fc9a75 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -662,6 +662,8 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
*/
static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt)
{
+ const struct virtio_transport *t;
+ struct virtio_vsock_pkt *reply;
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RST,
.type = le16_to_cpu(pkt->hdr.type),
@@ -672,15 +674,21 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt)
if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
return 0;
- pkt = virtio_transport_alloc_pkt(&info, 0,
- le64_to_cpu(pkt->hdr.dst_cid),
- le32_to_cpu(pkt->hdr.dst_port),
- le64_to_cpu(pkt->hdr.src_cid),
- le32_to_cpu(pkt->hdr.src_port));
- if (!pkt)
+ reply = virtio_transport_alloc_pkt(&info, 0,
+ le64_to_cpu(pkt->hdr.dst_cid),
+ le32_to_cpu(pkt->hdr.dst_port),
+ le64_to_cpu(pkt->hdr.src_cid),
+ le32_to_cpu(pkt->hdr.src_port));
+ if (!reply)
return -ENOMEM;
- return virtio_transport_get_ops()->send_pkt(pkt);
+ t = virtio_transport_get_ops();
+ if (!t) {
+ virtio_transport_free_pkt(reply);
+ return -ENOTCONN;
+ }
+
+ return t->send_pkt(reply);
}
static void virtio_transport_wait_close(struct sock *sk, long timeout)
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index eff31348e20b..20a511398389 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -820,8 +820,13 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
sock->state = SS_CONNECTED;
rc = 0;
out_put_neigh:
- if (rc)
+ if (rc) {
+ read_lock_bh(&x25_list_lock);
x25_neigh_put(x25->neighbour);
+ x25->neighbour = NULL;
+ read_unlock_bh(&x25_list_lock);
+ x25->state = X25_STATE_0;
+ }
out_put_route:
x25_route_put(rt);
out:
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 77520eacee8f..989e52386c35 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -193,9 +193,6 @@ static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
static void xdp_umem_release(struct xdp_umem *umem)
{
- struct task_struct *task;
- struct mm_struct *mm;
-
xdp_umem_clear_dev(umem);
ida_simple_remove(&umem_ida, umem->id);
@@ -214,21 +211,10 @@ static void xdp_umem_release(struct xdp_umem *umem)
xdp_umem_unpin_pages(umem);
- task = get_pid_task(umem->pid, PIDTYPE_PID);
- put_pid(umem->pid);
- if (!task)
- goto out;
- mm = get_task_mm(task);
- put_task_struct(task);
- if (!mm)
- goto out;
-
- mmput(mm);
kfree(umem->pages);
umem->pages = NULL;
xdp_umem_unaccount_pages(umem);
-out:
kfree(umem);
}
@@ -357,7 +343,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (size_chk < 0)
return -EINVAL;
- umem->pid = get_task_pid(current, PIDTYPE_PID);
umem->address = (unsigned long)addr;
umem->chunk_mask = ~((u64)chunk_size - 1);
umem->size = size;
@@ -373,7 +358,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
err = xdp_umem_account_pages(umem);
if (err)
- goto out;
+ return err;
err = xdp_umem_pin_pages(umem);
if (err)
@@ -392,8 +377,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
out_account:
xdp_umem_unaccount_pages(umem);
-out:
- put_pid(umem->pid);
return err;
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 6697084e3fdf..a14e8864e4fa 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -407,6 +407,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
if (sxdp->sxdp_family != AF_XDP)
return -EINVAL;
+ flags = sxdp->sxdp_flags;
+ if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY))
+ return -EINVAL;
+
mutex_lock(&xs->mutex);
if (xs->dev) {
err = -EBUSY;
@@ -425,7 +429,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
qid = sxdp->sxdp_queue_id;
- flags = sxdp->sxdp_flags;
if (flags & XDP_SHARED_UMEM) {
struct xdp_sock *umem_xs;
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index 661d007c3b28..d5e06c8e0cbf 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -68,9 +68,9 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
if (!err && umem->fq)
- err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb);
+ err = xsk_diag_put_ring(umem->fq, XDP_DIAG_UMEM_FILL_RING, nlskb);
if (!err && umem->cq) {
- err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING,
+ err = xsk_diag_put_ring(umem->cq, XDP_DIAG_UMEM_COMPLETION_RING,
nlskb);
}
return err;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index bcb5cbb40419..610c0bdc0c2b 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -174,8 +174,8 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
if (!xskq_is_valid_addr(q, d->addr))
return false;
- if (((d->addr + d->len) & q->chunk_mask) !=
- (d->addr & q->chunk_mask)) {
+ if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) ||
+ d->options) {
q->invalid_descs++;
return false;
}