From 51c5aba3b672c4285fca052817f34b22dc79dda7 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:04 +0100 Subject: net/smc: recvmsg and splice_read should return 0 after shutdown When a socket was connected and is now shut down for read, return 0 to indicate end of data in recvmsg and splice_read (like TCP) and do not return ENOTCONN. This behavior is required by the socket api. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net/smc/af_smc.c') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c4e56602e0c6..b04a813fc865 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1505,6 +1505,11 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, smc = smc_sk(sk); lock_sock(sk); + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if ((sk->sk_state == SMC_INIT) || (sk->sk_state == SMC_LISTEN) || (sk->sk_state == SMC_CLOSED)) @@ -1840,7 +1845,11 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, smc = smc_sk(sk); lock_sock(sk); - + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if (sk->sk_state == SMC_INIT || sk->sk_state == SMC_LISTEN || sk->sk_state == SMC_CLOSED) -- cgit v1.2.3-55-g7522 From b03faa1fafc8018295401dc558bdc76362d860a4 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Feb 2019 15:56:15 +0100 Subject: net/smc: postpone release of clcsock According to RFC7609 (http://www.rfc-editor.org/info/rfc7609) first the SMC-R connection is shut down and then the normal TCP connection FIN processing drives cleanup of the internal TCP connection. The unconditional release of the clcsock during active socket closing has to be postponed if the peer has not yet signalled socket closing. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 33 +++++++++++++++++---------------- net/smc/smc_close.c | 7 ++++++- 2 files changed, 23 insertions(+), 17 deletions(-) (limited to 'net/smc/af_smc.c') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 369870b0ef79..60ccc8f50368 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -145,32 +145,33 @@ static int smc_release(struct socket *sock) rc = smc_close_active(smc); sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; - } - - sk->sk_prot->unhash(sk); - - if (smc->clcsock) { - if (smc->use_fallback && sk->sk_state == SMC_LISTEN) { + } else { + if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) + sock_put(sk); /* passive closing */ + if (sk->sk_state == SMC_LISTEN) { /* wake up clcsock accept */ rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); } - mutex_lock(&smc->clcsock_release_lock); - sock_release(smc->clcsock); - smc->clcsock = NULL; - mutex_unlock(&smc->clcsock_release_lock); - } - if (smc->use_fallback) { - if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) - sock_put(sk); /* passive closing */ sk->sk_state = SMC_CLOSED; sk->sk_state_change(sk); } + sk->sk_prot->unhash(sk); + + if (sk->sk_state == SMC_CLOSED) { + if (smc->clcsock) { + mutex_lock(&smc->clcsock_release_lock); + sock_release(smc->clcsock); + smc->clcsock = NULL; + mutex_unlock(&smc->clcsock_release_lock); + } + if (!smc->use_fallback) + smc_conn_free(&smc->conn); + } + /* detach socket */ sock_orphan(sk); sock->sk = NULL; - if (!smc->use_fallback && sk->sk_state == SMC_CLOSED) - smc_conn_free(&smc->conn); release_sock(sk); sock_put(sk); /* final sock_put */ diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index ea2b87f29469..0e60dd741698 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -405,8 +405,13 @@ wakeup: if (old_state != sk->sk_state) { sk->sk_state_change(sk); if ((sk->sk_state == SMC_CLOSED) && - (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) + (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { smc_conn_free(conn); + if (smc->clcsock) { + sock_release(smc->clcsock); + smc->clcsock = NULL; + } + } } release_sock(sk); sock_put(sk); /* sock_hold done by schedulers of close_work */ -- cgit v1.2.3-55-g7522 From 62c7139f3ed011379fbbef832b4b15e3c10b355f Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 7 Feb 2019 15:56:17 +0100 Subject: net/smc: unlock LGR pending lock earlier for SMC-D If SMC client and server connections are both established at the same time, smc_connect_ism() cannot send a CLC confirm message while smc_listen_work() is waiting for one due to lock contention. This can result in timeouts in smc_clc_wait_msg() and failed SMC connections. In case of SMC-D, the LGR pending lock is not needed while smc_listen_work() is waiting for the CLC confirm message. So, this patch releases the lock earlier for SMC-D to avoid the locking issue. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'net/smc/af_smc.c') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 60ccc8f50368..cf49ed05007b 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1287,24 +1287,28 @@ static void smc_listen_work(struct work_struct *work) return; } + /* SMC-D does not need this lock any more */ + if (ism_supported) + mutex_unlock(&smc_create_lgr_pending); + /* receive SMC Confirm CLC message */ reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), SMC_CLC_CONFIRM, CLC_WAIT_TIME); if (reason_code) { - mutex_unlock(&smc_create_lgr_pending); + if (!ism_supported) + mutex_unlock(&smc_create_lgr_pending); smc_listen_decline(new_smc, reason_code, local_contact); return; } /* finish worker */ if (!ism_supported) { - if (smc_listen_rdma_finish(new_smc, &cclc, local_contact)) { - mutex_unlock(&smc_create_lgr_pending); + rc = smc_listen_rdma_finish(new_smc, &cclc, local_contact); + mutex_unlock(&smc_create_lgr_pending); + if (rc) return; - } } smc_conn_save_peer_info(new_smc, &cclc); - mutex_unlock(&smc_create_lgr_pending); smc_listen_out_connected(new_smc); } -- cgit v1.2.3-55-g7522 From 72a36a8aecb520f8af6529476dae16f7b6a3d87e Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 7 Feb 2019 15:56:18 +0100 Subject: net/smc: use client and server LGR pending locks for SMC-R If SMC client and server connections are both established at the same time, smc_connect_rdma() cannot send a CLC confirm message while smc_listen_work() is waiting for one due to lock contention. This can result in timeouts in smc_clc_wait_msg() and failed SMC connections. In case of SMC-R, there are two types of LGRs (client and server LGRs) which can be protected by separate locks. So, this patch splits the LGR pending lock into two separate locks for client and server to avoid the locking issue for SMC-R. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) (limited to 'net/smc/af_smc.c') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index cf49ed05007b..48ea7669161f 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -42,8 +42,11 @@ #include "smc_rx.h" #include "smc_close.h" -static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group - * creation +static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group + * creation on server + */ +static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group + * creation on client */ static void smc_tcp_listen_work(struct work_struct *); @@ -477,7 +480,12 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code, { if (local_contact == SMC_FIRST_CONTACT) smc_lgr_forget(smc->conn.lgr); - mutex_unlock(&smc_create_lgr_pending); + if (smc->conn.lgr->is_smcd) + /* there is only one lgr role for SMC-D; use server lock */ + mutex_unlock(&smc_server_lgr_pending); + else + mutex_unlock(&smc_client_lgr_pending); + smc_conn_free(&smc->conn); return reason_code; } @@ -562,7 +570,7 @@ static int smc_connect_rdma(struct smc_sock *smc, struct smc_link *link; int reason_code = 0; - mutex_lock(&smc_create_lgr_pending); + mutex_lock(&smc_client_lgr_pending); local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev, ibport, ntoh24(aclc->qpn), &aclc->lcl, NULL, 0); @@ -573,7 +581,8 @@ static int smc_connect_rdma(struct smc_sock *smc, reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ else reason_code = SMC_CLC_DECL_INTERR; /* other error */ - return smc_connect_abort(smc, reason_code, 0); + mutex_unlock(&smc_client_lgr_pending); + return reason_code; } link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; @@ -617,7 +626,7 @@ static int smc_connect_rdma(struct smc_sock *smc, return smc_connect_abort(smc, reason_code, local_contact); } - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_client_lgr_pending); smc_copy_sock_settings_to_clc(smc); if (smc->sk.sk_state == SMC_INIT) @@ -634,11 +643,14 @@ static int smc_connect_ism(struct smc_sock *smc, int local_contact = SMC_FIRST_CONTACT; int rc = 0; - mutex_lock(&smc_create_lgr_pending); + /* there is only one lgr role for SMC-D; use server lock */ + mutex_lock(&smc_server_lgr_pending); local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0, NULL, ismdev, aclc->gid); - if (local_contact < 0) - return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0); + if (local_contact < 0) { + mutex_unlock(&smc_server_lgr_pending); + return SMC_CLC_DECL_MEM; + } /* Create send and receive buffers */ if (smc_buf_create(smc, true)) @@ -652,7 +664,7 @@ static int smc_connect_ism(struct smc_sock *smc, rc = smc_clc_send_confirm(smc); if (rc) return smc_connect_abort(smc, rc, local_contact); - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_copy_sock_settings_to_clc(smc); if (smc->sk.sk_state == SMC_INIT) @@ -1251,7 +1263,7 @@ static void smc_listen_work(struct work_struct *work) return; } - mutex_lock(&smc_create_lgr_pending); + mutex_lock(&smc_server_lgr_pending); smc_close_init(new_smc); smc_rx_init(new_smc); smc_tx_init(new_smc); @@ -1273,7 +1285,7 @@ static void smc_listen_work(struct work_struct *work) &local_contact) || smc_listen_rdma_reg(new_smc, local_contact))) { /* SMC not supported, decline */ - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP, local_contact); return; @@ -1282,21 +1294,21 @@ static void smc_listen_work(struct work_struct *work) /* send SMC Accept CLC message */ rc = smc_clc_send_accept(new_smc, local_contact); if (rc) { - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_listen_decline(new_smc, rc, local_contact); return; } /* SMC-D does not need this lock any more */ if (ism_supported) - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); /* receive SMC Confirm CLC message */ reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), SMC_CLC_CONFIRM, CLC_WAIT_TIME); if (reason_code) { if (!ism_supported) - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_listen_decline(new_smc, reason_code, local_contact); return; } @@ -1304,7 +1316,7 @@ static void smc_listen_work(struct work_struct *work) /* finish worker */ if (!ism_supported) { rc = smc_listen_rdma_finish(new_smc, &cclc, local_contact); - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); if (rc) return; } -- cgit v1.2.3-55-g7522 From 64e28b52c7a6616217bee67ba2ad886f478f9737 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 21 Feb 2019 13:01:02 +0100 Subject: net/smc: add pnet table namespace support This patch adds namespace support to the pnet table code. Each network namespace gets its own pnet table. Infiniband and smcd device pnetids can only be modified in the initial namespace. In other namespaces they can still be used as if they were set by the underlying hardware. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 28 ++++++++++ net/smc/smc_netns.h | 20 ++++++++ net/smc/smc_pnet.c | 145 ++++++++++++++++++++++++++++++++++++---------------- net/smc/smc_pnet.h | 12 +++++ 4 files changed, 162 insertions(+), 43 deletions(-) create mode 100644 net/smc/smc_netns.h (limited to 'net/smc/af_smc.c') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 46fa9f3016cc..77ef53596d18 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -30,6 +30,10 @@ #include #include +#include +#include +#include "smc_netns.h" + #include "smc.h" #include "smc_clc.h" #include "smc_llc.h" @@ -1966,10 +1970,33 @@ static const struct net_proto_family smc_sock_family_ops = { .create = smc_create, }; +unsigned int smc_net_id; + +static __net_init int smc_net_init(struct net *net) +{ + return smc_pnet_net_init(net); +} + +static void __net_exit smc_net_exit(struct net *net) +{ + smc_pnet_net_exit(net); +} + +static struct pernet_operations smc_net_ops = { + .init = smc_net_init, + .exit = smc_net_exit, + .id = &smc_net_id, + .size = sizeof(struct smc_net), +}; + static int __init smc_init(void) { int rc; + rc = register_pernet_subsys(&smc_net_ops); + if (rc) + return rc; + rc = smc_pnet_init(); if (rc) return rc; @@ -2035,6 +2062,7 @@ static void __exit smc_exit(void) proto_unregister(&smc_proto6); proto_unregister(&smc_proto); smc_pnet_exit(); + unregister_pernet_subsys(&smc_net_ops); } module_init(smc_init); diff --git a/net/smc/smc_netns.h b/net/smc/smc_netns.h new file mode 100644 index 000000000000..e7a8fc4ae02f --- /dev/null +++ b/net/smc/smc_netns.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Shared Memory Communications + * + * Network namespace definitions. + * + * Copyright IBM Corp. 2018 + */ + +#ifndef SMC_NETNS_H +#define SMC_NETNS_H + +#include "smc_pnet.h" + +extern unsigned int smc_net_id; + +/* per-network namespace private data */ +struct smc_net { + struct smc_pnettable pnettable; +}; +#endif diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 5497a8b44287..878f5c085444 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -20,6 +20,9 @@ #include +#include +#include "smc_netns.h" + #include "smc_pnet.h" #include "smc_ib.h" #include "smc_ism.h" @@ -46,19 +49,6 @@ static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { static struct genl_family smc_pnet_nl_family; -/** - * struct smc_pnettable - SMC PNET table anchor - * @lock: Lock for list action - * @pnetlist: List of PNETIDs - */ -static struct smc_pnettable { - rwlock_t lock; - struct list_head pnetlist; -} smc_pnettable = { - .pnetlist = LIST_HEAD_INIT(smc_pnettable.pnetlist), - .lock = __RW_LOCK_UNLOCKED(smc_pnettable.lock) -}; - /** * struct smc_user_pnetentry - pnet identifier name entry for/from user * @list: List node. @@ -101,17 +91,23 @@ static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2) /* Remove a pnetid from the pnet table. */ -static int smc_pnet_remove_by_pnetid(char *pnet_name) +static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) { struct smc_pnetentry *pnetelem, *tmp_pe; + struct smc_pnettable *pnettable; struct smc_ib_device *ibdev; struct smcd_dev *smcd_dev; + struct smc_net *sn; int rc = -ENOENT; int ibport; + /* get pnettable for namespace */ + sn = net_generic(net, smc_net_id); + pnettable = &sn->pnettable; + /* remove netdevices */ - write_lock(&smc_pnettable.lock); - list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, + write_lock(&pnettable->lock); + list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (!pnet_name || smc_pnet_match(pnetelem->pnet_name, pnet_name)) { @@ -121,7 +117,12 @@ static int smc_pnet_remove_by_pnetid(char *pnet_name) rc = 0; } } - write_unlock(&smc_pnettable.lock); + write_unlock(&pnettable->lock); + + /* if this is not the initial namespace, stop here */ + if (net != &init_net) + return rc; + /* remove ib devices */ spin_lock(&smc_ib_devices.lock); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { @@ -158,11 +159,17 @@ static int smc_pnet_remove_by_pnetid(char *pnet_name) static int smc_pnet_remove_by_ndev(struct net_device *ndev) { struct smc_pnetentry *pnetelem, *tmp_pe; + struct smc_pnettable *pnettable; + struct net *net = dev_net(ndev); + struct smc_net *sn; int rc = -ENOENT; - write_lock(&smc_pnettable.lock); - list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, - list) { + /* get pnettable for namespace */ + sn = net_generic(net, smc_net_id); + pnettable = &sn->pnettable; + + write_lock(&pnettable->lock); + list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->ndev == ndev) { list_del(&pnetelem->list); dev_put(pnetelem->ndev); @@ -171,13 +178,14 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) break; } } - write_unlock(&smc_pnettable.lock); + write_unlock(&pnettable->lock); return rc; } /* Append a pnetid to the end of the pnet table if not already on this list. */ -static int smc_pnet_enter(struct smc_user_pnetentry *new_pnetelem) +static int smc_pnet_enter(struct smc_pnettable *pnettable, + struct smc_user_pnetentry *new_pnetelem) { u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; @@ -233,17 +241,17 @@ static int smc_pnet_enter(struct smc_user_pnetentry *new_pnetelem) SMC_MAX_PNETID_LEN); tmp_pnetelem->ndev = new_pnetelem->ndev; - write_lock(&smc_pnettable.lock); - list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { + write_lock(&pnettable->lock); + list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { if (pnetelem->ndev == new_pnetelem->ndev) new_netdev = false; } if (new_netdev) { dev_hold(tmp_pnetelem->ndev); - list_add_tail(&tmp_pnetelem->list, &smc_pnettable.pnetlist); - write_unlock(&smc_pnettable.lock); + list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist); + write_unlock(&pnettable->lock); } else { - write_unlock(&smc_pnettable.lock); + write_unlock(&pnettable->lock); kfree(tmp_pnetelem); } @@ -340,6 +348,10 @@ static int smc_pnet_fill_entry(struct net *net, goto error; } + /* if this is not the initial namespace, stop here */ + if (net != &init_net) + return 0; + rc = -EINVAL; if (tb[SMC_PNETID_IBNAME]) { ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); @@ -403,11 +415,17 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct smc_user_pnetentry pnetelem; + struct smc_pnettable *pnettable; + struct smc_net *sn; int rc; + /* get pnettable for namespace */ + sn = net_generic(net, smc_net_id); + pnettable = &sn->pnettable; + rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs); if (!rc) - rc = smc_pnet_enter(&pnetelem); + rc = smc_pnet_enter(pnettable, &pnetelem); if (pnetelem.ndev) dev_put(pnetelem.ndev); return rc; @@ -415,9 +433,11 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) { + struct net *net = genl_info_net(info); + if (!info->attrs[SMC_PNETID_NAME]) return -EINVAL; - return smc_pnet_remove_by_pnetid( + return smc_pnet_remove_by_pnetid(net, (char *)nla_data(info->attrs[SMC_PNETID_NAME])); } @@ -445,19 +465,25 @@ static int smc_pnet_dumpinfo(struct sk_buff *skb, return 0; } -static int _smc_pnet_dump(struct sk_buff *skb, u32 portid, u32 seq, u8 *pnetid, - int start_idx) +static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, + u32 seq, u8 *pnetid, int start_idx) { struct smc_user_pnetentry tmp_entry; + struct smc_pnettable *pnettable; struct smc_pnetentry *pnetelem; struct smc_ib_device *ibdev; struct smcd_dev *smcd_dev; + struct smc_net *sn; int idx = 0; int ibport; + /* get pnettable for namespace */ + sn = net_generic(net, smc_net_id); + pnettable = &sn->pnettable; + /* dump netdevices */ - read_lock(&smc_pnettable.lock); - list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { + read_lock(&pnettable->lock); + list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) continue; if (idx++ < start_idx) @@ -472,7 +498,11 @@ static int _smc_pnet_dump(struct sk_buff *skb, u32 portid, u32 seq, u8 *pnetid, break; } } - read_unlock(&smc_pnettable.lock); + read_unlock(&pnettable->lock); + + /* if this is not the initial namespace, stop here */ + if (net != &init_net) + return idx; /* dump ib devices */ spin_lock(&smc_ib_devices.lock); @@ -528,9 +558,10 @@ static int _smc_pnet_dump(struct sk_buff *skb, u32 portid, u32 seq, u8 *pnetid, static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); int idx; - idx = _smc_pnet_dump(skb, NETLINK_CB(cb->skb).portid, + idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NULL, cb->args[0]); cb->args[0] = idx; @@ -540,6 +571,7 @@ static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) /* Retrieve one PNETID entry */ static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) { + struct net *net = genl_info_net(info); struct sk_buff *msg; void *hdr; @@ -550,7 +582,7 @@ static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - _smc_pnet_dump(msg, info->snd_portid, info->snd_seq, + _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq, nla_data(info->attrs[SMC_PNETID_NAME]), 0); /* finish multi part message and send it */ @@ -567,7 +599,9 @@ static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) */ static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) { - return smc_pnet_remove_by_pnetid(NULL); + struct net *net = genl_info_net(info); + + return smc_pnet_remove_by_pnetid(net, NULL); } /* SMC_PNETID generic netlink operation definition */ @@ -631,6 +665,18 @@ static struct notifier_block smc_netdev_notifier = { .notifier_call = smc_pnet_netdev_event }; +/* init network namespace */ +int smc_pnet_net_init(struct net *net) +{ + struct smc_net *sn = net_generic(net, smc_net_id); + struct smc_pnettable *pnettable = &sn->pnettable; + + INIT_LIST_HEAD(&pnettable->pnetlist); + rwlock_init(&pnettable->lock); + + return 0; +} + int __init smc_pnet_init(void) { int rc; @@ -644,9 +690,15 @@ int __init smc_pnet_init(void) return rc; } +/* exit network namespace */ +void smc_pnet_net_exit(struct net *net) +{ + /* flush pnet table */ + smc_pnet_remove_by_pnetid(net, NULL); +} + void smc_pnet_exit(void) { - smc_pnet_flush(NULL, NULL); unregister_netdevice_notifier(&smc_netdev_notifier); genl_unregister_family(&smc_pnet_nl_family); } @@ -674,22 +726,29 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev) return ndev; } -static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *netdev, +static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, u8 *pnetid) { + struct smc_pnettable *pnettable; + struct net *net = dev_net(ndev); struct smc_pnetentry *pnetelem; + struct smc_net *sn; int rc = -ENOENT; - read_lock(&smc_pnettable.lock); - list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { - if (netdev == pnetelem->ndev) { + /* get pnettable for namespace */ + sn = net_generic(net, smc_net_id); + pnettable = &sn->pnettable; + + read_lock(&pnettable->lock); + list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { + if (ndev == pnetelem->ndev) { /* get pnetid of netdev device */ memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN); rc = 0; break; } } - read_unlock(&smc_pnettable.lock); + read_unlock(&pnettable->lock); return rc; } diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 37044e4ee50f..5eac42fb45d0 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -19,6 +19,16 @@ struct smc_ib_device; struct smcd_dev; +/** + * struct smc_pnettable - SMC PNET table anchor + * @lock: Lock for list action + * @pnetlist: List of PNETIDs + */ +struct smc_pnettable { + rwlock_t lock; + struct list_head pnetlist; +}; + static inline int smc_pnetid_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid) { @@ -30,7 +40,9 @@ static inline int smc_pnetid_by_dev_port(struct device *dev, } int smc_pnet_init(void) __init; +int smc_pnet_net_init(struct net *net); void smc_pnet_exit(void); +void smc_pnet_net_exit(struct net *net); void smc_pnet_find_roce_resource(struct sock *sk, struct smc_ib_device **smcibdev, u8 *ibport, unsigned short vlan_id, u8 gid[]); -- cgit v1.2.3-55-g7522