summaryrefslogtreecommitdiffstats
path: root/net/core/rtnetlink.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/rtnetlink.c')
-rw-r--r--net/core/rtnetlink.c892
1 files changed, 616 insertions, 276 deletions
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5ace48926b19..204297dffd2a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -62,7 +62,9 @@
struct rtnl_link {
rtnl_doit_func doit;
rtnl_dumpit_func dumpit;
+ struct module *owner;
unsigned int flags;
+ struct rcu_head rcu;
};
static DEFINE_MUTEX(rtnl_mutex);
@@ -127,8 +129,7 @@ bool lockdep_rtnl_is_held(void)
EXPORT_SYMBOL(lockdep_rtnl_is_held);
#endif /* #ifdef CONFIG_PROVE_LOCKING */
-static struct rtnl_link __rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
-static refcount_t rtnl_msg_handlers_ref[RTNL_FAMILY_MAX + 1];
+static struct rtnl_link *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
static inline int rtm_msgindex(int msgtype)
{
@@ -144,72 +145,127 @@ static inline int rtm_msgindex(int msgtype)
return msgindex;
}
-/**
- * __rtnl_register - Register a rtnetlink message type
- * @protocol: Protocol family or PF_UNSPEC
- * @msgtype: rtnetlink message type
- * @doit: Function pointer called for each request message
- * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
- *
- * Registers the specified function pointers (at least one of them has
- * to be non-NULL) to be called whenever a request message for the
- * specified protocol family and message type is received.
- *
- * The special protocol family PF_UNSPEC may be used to define fallback
- * function pointers for the case when no entry for the specific protocol
- * family exists.
- *
- * Returns 0 on success or a negative error code.
- */
-int __rtnl_register(int protocol, int msgtype,
- rtnl_doit_func doit, rtnl_dumpit_func dumpit,
- unsigned int flags)
+static struct rtnl_link *rtnl_get_link(int protocol, int msgtype)
+{
+ struct rtnl_link **tab;
+
+ if (protocol >= ARRAY_SIZE(rtnl_msg_handlers))
+ protocol = PF_UNSPEC;
+
+ tab = rcu_dereference_rtnl(rtnl_msg_handlers[protocol]);
+ if (!tab)
+ tab = rcu_dereference_rtnl(rtnl_msg_handlers[PF_UNSPEC]);
+
+ return tab[msgtype];
+}
+
+static int rtnl_register_internal(struct module *owner,
+ int protocol, int msgtype,
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+ unsigned int flags)
{
- struct rtnl_link *tab;
+ struct rtnl_link *link, *old;
+ struct rtnl_link __rcu **tab;
int msgindex;
+ int ret = -ENOBUFS;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype);
- tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]);
+ rtnl_lock();
+ tab = rtnl_msg_handlers[protocol];
if (tab == NULL) {
- tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
- if (tab == NULL)
- return -ENOBUFS;
+ tab = kcalloc(RTM_NR_MSGTYPES, sizeof(void *), GFP_KERNEL);
+ if (!tab)
+ goto unlock;
+ /* ensures we see the 0 stores */
rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
}
+ old = rtnl_dereference(tab[msgindex]);
+ if (old) {
+ link = kmemdup(old, sizeof(*old), GFP_KERNEL);
+ if (!link)
+ goto unlock;
+ } else {
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ if (!link)
+ goto unlock;
+ }
+
+ WARN_ON(link->owner && link->owner != owner);
+ link->owner = owner;
+
+ WARN_ON(doit && link->doit && link->doit != doit);
if (doit)
- tab[msgindex].doit = doit;
+ link->doit = doit;
+ WARN_ON(dumpit && link->dumpit && link->dumpit != dumpit);
if (dumpit)
- tab[msgindex].dumpit = dumpit;
- tab[msgindex].flags |= flags;
+ link->dumpit = dumpit;
- return 0;
+ link->flags |= flags;
+
+ /* publish protocol:msgtype */
+ rcu_assign_pointer(tab[msgindex], link);
+ ret = 0;
+ if (old)
+ kfree_rcu(old, rcu);
+unlock:
+ rtnl_unlock();
+ return ret;
+}
+
+/**
+ * rtnl_register_module - Register a rtnetlink message type
+ *
+ * @owner: module registering the hook (THIS_MODULE)
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ * @doit: Function pointer called for each request message
+ * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
+ *
+ * Like rtnl_register, but for use by removable modules.
+ */
+int rtnl_register_module(struct module *owner,
+ int protocol, int msgtype,
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+ unsigned int flags)
+{
+ return rtnl_register_internal(owner, protocol, msgtype,
+ doit, dumpit, flags);
}
-EXPORT_SYMBOL_GPL(__rtnl_register);
+EXPORT_SYMBOL_GPL(rtnl_register_module);
/**
* rtnl_register - Register a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ * @doit: Function pointer called for each request message
+ * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
*
- * Identical to __rtnl_register() but panics on failure. This is useful
- * as failure of this function is very unlikely, it can only happen due
- * to lack of memory when allocating the chain to store all message
- * handlers for a protocol. Meant for use in init functions where lack
- * of memory implies no sense in continuing.
+ * Registers the specified function pointers (at least one of them has
+ * to be non-NULL) to be called whenever a request message for the
+ * specified protocol family and message type is received.
+ *
+ * The special protocol family PF_UNSPEC may be used to define fallback
+ * function pointers for the case when no entry for the specific protocol
+ * family exists.
*/
void rtnl_register(int protocol, int msgtype,
rtnl_doit_func doit, rtnl_dumpit_func dumpit,
unsigned int flags)
{
- if (__rtnl_register(protocol, msgtype, doit, dumpit, flags) < 0)
- panic("Unable to register rtnetlink message handler, "
- "protocol = %d, message type = %d\n",
- protocol, msgtype);
+ int err;
+
+ err = rtnl_register_internal(NULL, protocol, msgtype, doit, dumpit,
+ flags);
+ if (err)
+ pr_err("Unable to register rtnetlink message handler, "
+ "protocol = %d, message type = %d\n", protocol, msgtype);
}
-EXPORT_SYMBOL_GPL(rtnl_register);
/**
* rtnl_unregister - Unregister a rtnetlink message type
@@ -220,24 +276,25 @@ EXPORT_SYMBOL_GPL(rtnl_register);
*/
int rtnl_unregister(int protocol, int msgtype)
{
- struct rtnl_link *handlers;
+ struct rtnl_link **tab, *link;
int msgindex;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype);
rtnl_lock();
- handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
- if (!handlers) {
+ tab = rtnl_dereference(rtnl_msg_handlers[protocol]);
+ if (!tab) {
rtnl_unlock();
return -ENOENT;
}
- handlers[msgindex].doit = NULL;
- handlers[msgindex].dumpit = NULL;
- handlers[msgindex].flags = 0;
+ link = tab[msgindex];
+ rcu_assign_pointer(tab[msgindex], NULL);
rtnl_unlock();
+ kfree_rcu(link, rcu);
+
return 0;
}
EXPORT_SYMBOL_GPL(rtnl_unregister);
@@ -251,20 +308,27 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
*/
void rtnl_unregister_all(int protocol)
{
- struct rtnl_link *handlers;
+ struct rtnl_link **tab, *link;
+ int msgindex;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
rtnl_lock();
- handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
+ tab = rtnl_msg_handlers[protocol];
RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL);
+ for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) {
+ link = tab[msgindex];
+ if (!link)
+ continue;
+
+ rcu_assign_pointer(tab[msgindex], NULL);
+ kfree_rcu(link, rcu);
+ }
rtnl_unlock();
synchronize_net();
- while (refcount_read(&rtnl_msg_handlers_ref[protocol]) > 1)
- schedule();
- kfree(handlers);
+ kfree(tab);
}
EXPORT_SYMBOL_GPL(rtnl_unregister_all);
@@ -453,7 +517,7 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
{
const struct rtnl_af_ops *ops;
- list_for_each_entry(ops, &rtnl_af_ops, list) {
+ list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
if (ops->family == family)
return ops;
}
@@ -470,32 +534,22 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
void rtnl_af_register(struct rtnl_af_ops *ops)
{
rtnl_lock();
- list_add_tail(&ops->list, &rtnl_af_ops);
+ list_add_tail_rcu(&ops->list, &rtnl_af_ops);
rtnl_unlock();
}
EXPORT_SYMBOL_GPL(rtnl_af_register);
/**
- * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
- * @ops: struct rtnl_af_ops * to unregister
- *
- * The caller must hold the rtnl_mutex.
- */
-void __rtnl_af_unregister(struct rtnl_af_ops *ops)
-{
- list_del(&ops->list);
-}
-EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
-
-/**
* rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
* @ops: struct rtnl_af_ops * to unregister
*/
void rtnl_af_unregister(struct rtnl_af_ops *ops)
{
rtnl_lock();
- __rtnl_af_unregister(ops);
+ list_del_rcu(&ops->list);
rtnl_unlock();
+
+ synchronize_rcu();
}
EXPORT_SYMBOL_GPL(rtnl_af_unregister);
@@ -508,13 +562,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
/* IFLA_AF_SPEC */
size = nla_total_size(sizeof(struct nlattr));
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->get_link_af_size) {
/* AF_* + nested data */
size += nla_total_size(sizeof(struct nlattr)) +
af_ops->get_link_af_size(dev, ext_filter_mask);
}
}
+ rcu_read_unlock();
return size;
}
@@ -522,11 +578,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
static bool rtnl_have_link_slave_info(const struct net_device *dev)
{
struct net_device *master_dev;
+ bool ret = false;
- master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
+ rcu_read_lock();
+
+ master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev);
if (master_dev && master_dev->rtnl_link_ops)
- return true;
- return false;
+ ret = true;
+ rcu_read_unlock();
+ return ret;
}
static int rtnl_link_slave_info_fill(struct sk_buff *skb,
@@ -844,6 +904,10 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_MULTICAST */
nla_total_size_64bit(sizeof(__u64)) +
+ /* IFLA_VF_STATS_RX_DROPPED */
+ nla_total_size_64bit(sizeof(__u64)) +
+ /* IFLA_VF_STATS_TX_DROPPED */
+ nla_total_size_64bit(sizeof(__u64)) +
nla_total_size(sizeof(struct ifla_vf_trust)));
return size;
} else
@@ -923,8 +987,13 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
+ rtnl_xdp_size() /* IFLA_XDP */
+ nla_total_size(4) /* IFLA_EVENT */
- + nla_total_size(1); /* IFLA_PROTO_DOWN */
-
+ + nla_total_size(4) /* IFLA_NEW_NETNSID */
+ + nla_total_size(4) /* IFLA_NEW_IFINDEX */
+ + nla_total_size(1) /* IFLA_PROTO_DOWN */
+ + nla_total_size(4) /* IFLA_IF_NETNSID */
+ + nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */
+ + nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */
+ + 0;
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1196,7 +1265,11 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
- vf_stats.multicast, IFLA_VF_STATS_PAD)) {
+ vf_stats.multicast, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED,
+ vf_stats.rx_dropped, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED,
+ vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) {
nla_nest_cancel(skb, vfstats);
goto nla_put_vf_failure;
}
@@ -1211,6 +1284,36 @@ nla_put_vfinfo_failure:
return -EMSGSIZE;
}
+static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
+ struct net_device *dev,
+ u32 ext_filter_mask)
+{
+ struct nlattr *vfinfo;
+ int i, num_vfs;
+
+ if (!dev->dev.parent || ((ext_filter_mask & RTEXT_FILTER_VF) == 0))
+ return 0;
+
+ num_vfs = dev_num_vf(dev->dev.parent);
+ if (nla_put_u32(skb, IFLA_NUM_VF, num_vfs))
+ return -EMSGSIZE;
+
+ if (!dev->netdev_ops->ndo_get_vf_config)
+ return 0;
+
+ vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
+ if (!vfinfo)
+ return -EMSGSIZE;
+
+ for (i = 0; i < num_vfs; i++) {
+ if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
+ return -EMSGSIZE;
+ }
+
+ nla_nest_end(skb, vfinfo);
+ return 0;
+}
+
static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
{
struct rtnl_link_ifmap map;
@@ -1233,6 +1336,7 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
{
const struct net_device_ops *ops = dev->netdev_ops;
const struct bpf_prog *generic_xdp_prog;
+ struct netdev_bpf xdp;
ASSERT_RTNL();
@@ -1242,10 +1346,13 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
*prog_id = generic_xdp_prog->aux->id;
return XDP_ATTACHED_SKB;
}
- if (!ops->ndo_xdp)
+ if (!ops->ndo_bpf)
return XDP_ATTACHED_NONE;
- return __dev_xdp_attached(dev, ops->ndo_xdp, prog_id);
+ __dev_xdp_query(dev, ops->ndo_bpf, &xdp);
+ *prog_id = xdp.prog_id;
+
+ return xdp.prog_attached;
}
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1307,16 +1414,109 @@ static u32 rtnl_get_event(unsigned long event)
return rtnl_event_type;
}
-static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
+{
+ const struct net_device *upper_dev;
+ int ret = 0;
+
+ rcu_read_lock();
+
+ upper_dev = netdev_master_upper_dev_get_rcu(dev);
+ if (upper_dev)
+ ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex);
+
+ rcu_read_unlock();
+ return ret;
+}
+
+static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev)
+{
+ int ifindex = dev_get_iflink(dev);
+
+ if (dev->ifindex == ifindex)
+ return 0;
+
+ return nla_put_u32(skb, IFLA_LINK, ifindex);
+}
+
+static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ char buf[IFALIASZ];
+ int ret;
+
+ ret = dev_get_alias(dev, buf, sizeof(buf));
+ return ret > 0 ? nla_put_string(skb, IFLA_IFALIAS, buf) : 0;
+}
+
+static int rtnl_fill_link_netnsid(struct sk_buff *skb,
+ const struct net_device *dev,
+ struct net *src_net)
+{
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) {
+ struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
+
+ if (!net_eq(dev_net(dev), link_net)) {
+ int id = peernet2id_alloc(src_net, link_net);
+
+ if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
+ return -EMSGSIZE;
+ }
+ }
+
+ return 0;
+}
+
+static int rtnl_fill_link_af(struct sk_buff *skb,
+ const struct net_device *dev,
+ u32 ext_filter_mask)
+{
+ const struct rtnl_af_ops *af_ops;
+ struct nlattr *af_spec;
+
+ af_spec = nla_nest_start(skb, IFLA_AF_SPEC);
+ if (!af_spec)
+ return -EMSGSIZE;
+
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
+ struct nlattr *af;
+ int err;
+
+ if (!af_ops->fill_link_af)
+ continue;
+
+ af = nla_nest_start(skb, af_ops->family);
+ if (!af)
+ return -EMSGSIZE;
+
+ err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
+ /*
+ * Caller may return ENODATA to indicate that there
+ * was no data to be dumped. This is not an error, it
+ * means we should trim the attribute header and
+ * continue.
+ */
+ if (err == -ENODATA)
+ nla_nest_cancel(skb, af);
+ else if (err < 0)
+ return -EMSGSIZE;
+
+ nla_nest_end(skb, af);
+ }
+
+ nla_nest_end(skb, af_spec);
+ return 0;
+}
+
+static int rtnl_fill_ifinfo(struct sk_buff *skb,
+ struct net_device *dev, struct net *src_net,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask,
- u32 event)
+ u32 event, int *new_nsid, int new_ifindex,
+ int tgt_netnsid)
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
- struct nlattr *af_spec;
- struct rtnl_af_ops *af_ops;
- struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
ASSERT_RTNL();
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -1331,6 +1531,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
ifm->ifi_flags = dev_get_flags(dev);
ifm->ifi_change = change;
+ if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_IF_NETNSID, tgt_netnsid))
+ goto nla_put_failure;
+
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
nla_put_u8(skb, IFLA_OPERSTATE,
@@ -1345,18 +1548,20 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
#ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
#endif
- (dev->ifindex != dev_get_iflink(dev) &&
- nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
- (upper_dev &&
- nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
+ nla_put_iflink(skb, dev) ||
+ put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
(dev->qdisc &&
nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
- (dev->ifalias &&
- nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) ||
+ nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
- atomic_read(&dev->carrier_changes)) ||
- nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
+ atomic_read(&dev->carrier_up_count) +
+ atomic_read(&dev->carrier_down_count)) ||
+ nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down) ||
+ nla_put_u32(skb, IFLA_CARRIER_UP_COUNT,
+ atomic_read(&dev->carrier_up_count)) ||
+ nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT,
+ atomic_read(&dev->carrier_down_count)))
goto nla_put_failure;
if (event != IFLA_EVENT_NONE) {
@@ -1385,27 +1590,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (rtnl_fill_stats(skb, dev))
goto nla_put_failure;
- if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
- nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
+ if (rtnl_fill_vf(skb, dev, ext_filter_mask))
goto nla_put_failure;
- if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
- ext_filter_mask & RTEXT_FILTER_VF) {
- int i;
- struct nlattr *vfinfo;
- int num_vfs = dev_num_vf(dev->dev.parent);
-
- vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
- if (!vfinfo)
- goto nla_put_failure;
- for (i = 0; i < num_vfs; i++) {
- if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
- goto nla_put_failure;
- }
-
- nla_nest_end(skb, vfinfo);
- }
-
if (rtnl_port_fill(skb, dev, ext_filter_mask))
goto nla_put_failure;
@@ -1417,51 +1604,27 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
goto nla_put_failure;
}
- if (dev->rtnl_link_ops &&
- dev->rtnl_link_ops->get_link_net) {
- struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
-
- if (!net_eq(dev_net(dev), link_net)) {
- int id = peernet2id_alloc(dev_net(dev), link_net);
-
- if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
- goto nla_put_failure;
- }
- }
-
- if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
+ if (rtnl_fill_link_netnsid(skb, dev, src_net))
goto nla_put_failure;
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
- if (af_ops->fill_link_af) {
- struct nlattr *af;
- int err;
-
- if (!(af = nla_nest_start(skb, af_ops->family)))
- goto nla_put_failure;
-
- err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
-
- /*
- * Caller may return ENODATA to indicate that there
- * was no data to be dumped. This is not an error, it
- * means we should trim the attribute header and
- * continue.
- */
- if (err == -ENODATA)
- nla_nest_cancel(skb, af);
- else if (err < 0)
- goto nla_put_failure;
+ if (new_nsid &&
+ nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
+ goto nla_put_failure;
+ if (new_ifindex &&
+ nla_put_s32(skb, IFLA_NEW_IFINDEX, new_ifindex) < 0)
+ goto nla_put_failure;
- nla_nest_end(skb, af);
- }
- }
- nla_nest_end(skb, af_spec);
+ rcu_read_lock();
+ if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
+ goto nla_put_failure_rcu;
+ rcu_read_unlock();
nlmsg_end(skb, nlh);
return 0;
+nla_put_failure_rcu:
+ rcu_read_unlock();
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
@@ -1495,6 +1658,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PROMISCUITY] = { .type = NLA_U32 },
[IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
[IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
+ [IFLA_GSO_MAX_SEGS] = { .type = NLA_U32 },
+ [IFLA_GSO_MAX_SIZE] = { .type = NLA_U32 },
[IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
[IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
@@ -1503,6 +1668,9 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_XDP] = { .type = NLA_NESTED },
[IFLA_EVENT] = { .type = NLA_U32 },
[IFLA_GROUP] = { .type = NLA_U32 },
+ [IFLA_IF_NETNSID] = { .type = NLA_S32 },
+ [IFLA_CARRIER_UP_COUNT] = { .type = NLA_U32 },
+ [IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1606,9 +1774,28 @@ static bool link_dump_filtered(struct net_device *dev,
return false;
}
+static struct net *get_target_net(struct sock *sk, int netnsid)
+{
+ struct net *net;
+
+ net = get_net_ns_by_id(sock_net(sk), netnsid);
+ if (!net)
+ return ERR_PTR(-EINVAL);
+
+ /* For now, the caller is required to have CAP_NET_ADMIN in
+ * the user namespace owning the target net ns.
+ */
+ if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) {
+ put_net(net);
+ return ERR_PTR(-EACCES);
+ }
+ return net;
+}
+
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ struct net *tgt_net = net;
int h, s_h;
int idx = 0, s_idx;
struct net_device *dev;
@@ -1618,6 +1805,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
const struct rtnl_link_ops *kind_ops = NULL;
unsigned int flags = NLM_F_MULTI;
int master_idx = 0;
+ int netnsid = -1;
int err;
int hdrlen;
@@ -1636,6 +1824,15 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX,
ifla_policy, NULL) >= 0) {
+ if (tb[IFLA_IF_NETNSID]) {
+ netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
+ tgt_net = get_target_net(skb->sk, netnsid);
+ if (IS_ERR(tgt_net)) {
+ tgt_net = net;
+ netnsid = -1;
+ }
+ }
+
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -1651,17 +1848,19 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
- head = &net->dev_index_head[h];
+ head = &tgt_net->dev_index_head[h];
hlist_for_each_entry(dev, head, index_hlist) {
if (link_dump_filtered(dev, master_idx, kind_ops))
goto cont;
if (idx < s_idx)
goto cont;
- err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+ err = rtnl_fill_ifinfo(skb, dev, net,
+ RTM_NEWLINK,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
flags,
- ext_filter_mask, 0);
+ ext_filter_mask, 0, NULL, 0,
+ netnsid);
if (err < 0) {
if (likely(skb->len))
@@ -1680,6 +1879,8 @@ out_err:
cb->args[0] = h;
cb->seq = net->dev_base_seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ if (netnsid >= 0)
+ put_net(tgt_net);
return err;
}
@@ -1707,6 +1908,49 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
}
EXPORT_SYMBOL(rtnl_link_get_net);
+/* Figure out which network namespace we are talking about by
+ * examining the link attributes in the following order:
+ *
+ * 1. IFLA_NET_NS_PID
+ * 2. IFLA_NET_NS_FD
+ * 3. IFLA_IF_NETNSID
+ */
+static struct net *rtnl_link_get_net_by_nlattr(struct net *src_net,
+ struct nlattr *tb[])
+{
+ struct net *net;
+
+ if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD])
+ return rtnl_link_get_net(src_net, tb);
+
+ if (!tb[IFLA_IF_NETNSID])
+ return get_net(src_net);
+
+ net = get_net_ns_by_id(src_net, nla_get_u32(tb[IFLA_IF_NETNSID]));
+ if (!net)
+ return ERR_PTR(-EINVAL);
+
+ return net;
+}
+
+static struct net *rtnl_link_get_net_capable(const struct sk_buff *skb,
+ struct net *src_net,
+ struct nlattr *tb[], int cap)
+{
+ struct net *net;
+
+ net = rtnl_link_get_net_by_nlattr(src_net, tb);
+ if (IS_ERR(net))
+ return net;
+
+ if (!netlink_ns_capable(skb, net->user_ns, cap)) {
+ put_net(net);
+ return ERR_PTR(-EPERM);
+ }
+
+ return net;
+}
+
static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
{
if (dev) {
@@ -1726,17 +1970,27 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
const struct rtnl_af_ops *af_ops;
- if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+ rcu_read_lock();
+ af_ops = rtnl_af_lookup(nla_type(af));
+ if (!af_ops) {
+ rcu_read_unlock();
return -EAFNOSUPPORT;
+ }
- if (!af_ops->set_link_af)
+ if (!af_ops->set_link_af) {
+ rcu_read_unlock();
return -EOPNOTSUPP;
+ }
if (af_ops->validate_link_af) {
err = af_ops->validate_link_af(dev, af);
- if (err < 0)
+ if (err < 0) {
+ rcu_read_unlock();
return err;
+ }
}
+
+ rcu_read_unlock();
}
}
@@ -1912,7 +2166,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
return err;
}
-static int do_set_master(struct net_device *dev, int ifindex)
+static int do_set_master(struct net_device *dev, int ifindex,
+ struct netlink_ext_ack *extack)
{
struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
const struct net_device_ops *ops;
@@ -1937,7 +2192,7 @@ static int do_set_master(struct net_device *dev, int ifindex)
return -EINVAL;
ops = upper_dev->netdev_ops;
if (ops->ndo_add_slave) {
- err = ops->ndo_add_slave(upper_dev, dev);
+ err = ops->ndo_add_slave(upper_dev, dev, extack);
if (err)
return err;
} else {
@@ -1958,17 +2213,14 @@ static int do_setlink(const struct sk_buff *skb,
const struct net_device_ops *ops = dev->netdev_ops;
int err;
- if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
- struct net *net = rtnl_link_get_net(dev_net(dev), tb);
+ if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_IF_NETNSID]) {
+ struct net *net = rtnl_link_get_net_capable(skb, dev_net(dev),
+ tb, CAP_NET_ADMIN);
if (IS_ERR(net)) {
err = PTR_ERR(net);
goto errout;
}
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
- put_net(net);
- err = -EPERM;
- goto errout;
- }
+
err = dev_change_net_namespace(dev, net, ifname);
put_net(net);
if (err)
@@ -2070,7 +2322,7 @@ static int do_setlink(const struct sk_buff *skb,
}
if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
if (err)
goto errout;
status |= DO_SETLINK_MODIFIED;
@@ -2085,17 +2337,37 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_TXQLEN]) {
unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]);
- unsigned int orig_len = dev->tx_queue_len;
-
- if (dev->tx_queue_len ^ value) {
- dev->tx_queue_len = value;
- err = call_netdevice_notifiers(
- NETDEV_CHANGE_TX_QUEUE_LEN, dev);
- err = notifier_to_errno(err);
- if (err) {
- dev->tx_queue_len = orig_len;
- goto errout;
- }
+
+ err = dev_change_tx_queue_len(dev, value);
+ if (err)
+ goto errout;
+ status |= DO_SETLINK_MODIFIED;
+ }
+
+ if (tb[IFLA_GSO_MAX_SIZE]) {
+ u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]);
+
+ if (max_size > GSO_MAX_SIZE) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (dev->gso_max_size ^ max_size) {
+ netif_set_gso_max_size(dev, max_size);
+ status |= DO_SETLINK_MODIFIED;
+ }
+ }
+
+ if (tb[IFLA_GSO_MAX_SEGS]) {
+ u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
+
+ if (max_segs > GSO_MAX_SEGS) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (dev->gso_max_segs ^ max_segs) {
+ dev->gso_max_segs = max_segs;
status |= DO_SETLINK_MODIFIED;
}
}
@@ -2193,13 +2465,17 @@ static int do_setlink(const struct sk_buff *skb,
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
const struct rtnl_af_ops *af_ops;
- if (!(af_ops = rtnl_af_lookup(nla_type(af))))
- BUG();
+ rcu_read_lock();
+
+ BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
err = af_ops->set_link_af(dev, af);
- if (err < 0)
+ if (err < 0) {
+ rcu_read_unlock();
goto errout;
+ }
+ rcu_read_unlock();
status |= DO_SETLINK_NOTIFY;
}
}
@@ -2361,11 +2637,13 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
- struct net_device *dev;
+ struct net *tgt_net = net;
+ struct net_device *dev = NULL;
struct ifinfomsg *ifm;
char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
int err;
+ int netnsid = -1;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
if (err < 0)
@@ -2374,20 +2652,38 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+ if (tb[IFLA_IF_NETNSID]) {
+ netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
+ tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
+ if (IS_ERR(tgt_net))
+ return PTR_ERR(tgt_net);
+ }
+
+ err = -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
- dev = __dev_get_by_index(net, ifm->ifi_index);
+ dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
else if (tb[IFLA_IFNAME])
- dev = __dev_get_by_name(net, ifname);
+ dev = __dev_get_by_name(tgt_net, ifname);
else if (tb[IFLA_GROUP])
- return rtnl_group_dellink(net, nla_get_u32(tb[IFLA_GROUP]));
+ err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP]));
else
- return -EINVAL;
+ goto out;
- if (!dev)
- return -ENODEV;
+ if (!dev) {
+ if (tb[IFLA_IFNAME] || ifm->ifi_index > 0)
+ err = -ENODEV;
+
+ goto out;
+ }
+
+ err = rtnl_delete_link(dev);
- return rtnl_delete_link(dev);
+out:
+ if (netnsid >= 0)
+ put_net(tgt_net);
+
+ return err;
}
int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
@@ -2454,6 +2750,10 @@ struct net_device *rtnl_create_link(struct net *net,
dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
if (tb[IFLA_GROUP])
dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
+ if (tb[IFLA_GSO_MAX_SIZE])
+ netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE]));
+ if (tb[IFLA_GSO_MAX_SEGS])
+ dev->gso_max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
return dev;
}
@@ -2502,6 +2802,9 @@ replay:
if (err < 0)
return err;
+ if (tb[IFLA_IF_NETNSID])
+ return -EOPNOTSUPP;
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
@@ -2579,12 +2882,6 @@ replay:
return err;
slave_data = slave_attr;
}
- if (m_ops->slave_validate) {
- err = m_ops->slave_validate(tb, slave_data,
- extack);
- if (err < 0)
- return err;
- }
}
if (dev) {
@@ -2655,14 +2952,10 @@ replay:
name_assign_type = NET_NAME_ENUM;
}
- dest_net = rtnl_link_get_net(net, tb);
+ dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
if (IS_ERR(dest_net))
return PTR_ERR(dest_net);
- err = -EPERM;
- if (!netlink_ns_capable(skb, dest_net->user_ns, CAP_NET_ADMIN))
- goto out;
-
if (tb[IFLA_LINK_NETNSID]) {
int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
@@ -2714,7 +3007,8 @@ replay:
goto out_unregister;
}
if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]),
+ extack);
if (err)
goto out_unregister;
}
@@ -2740,11 +3034,13 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
+ struct net *tgt_net = net;
struct ifinfomsg *ifm;
char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
struct net_device *dev = NULL;
struct sk_buff *nskb;
+ int netnsid = -1;
int err;
u32 ext_filter_mask = 0;
@@ -2752,35 +3048,50 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (tb[IFLA_IF_NETNSID]) {
+ netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
+ tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
+ if (IS_ERR(tgt_net))
+ return PTR_ERR(tgt_net);
+ }
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+ err = -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
- dev = __dev_get_by_index(net, ifm->ifi_index);
+ dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
else if (tb[IFLA_IFNAME])
- dev = __dev_get_by_name(net, ifname);
+ dev = __dev_get_by_name(tgt_net, ifname);
else
- return -EINVAL;
+ goto out;
+ err = -ENODEV;
if (dev == NULL)
- return -ENODEV;
+ goto out;
+ err = -ENOBUFS;
nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
if (nskb == NULL)
- return -ENOBUFS;
+ goto out;
- err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0);
+ err = rtnl_fill_ifinfo(nskb, dev, net,
+ RTM_NEWLINK, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq, 0, 0, ext_filter_mask,
+ 0, NULL, 0, netnsid);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size */
WARN_ON(err == -EMSGSIZE);
kfree_skb(nskb);
} else
err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
+out:
+ if (netnsid >= 0)
+ put_net(tgt_net);
return err;
}
@@ -2829,18 +3140,26 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
s_idx = 1;
for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
+ struct rtnl_link **tab;
int type = cb->nlh->nlmsg_type-RTM_BASE;
- struct rtnl_link *handlers;
+ struct rtnl_link *link;
rtnl_dumpit_func dumpit;
if (idx < s_idx || idx == PF_PACKET)
continue;
- handlers = rtnl_dereference(rtnl_msg_handlers[idx]);
- if (!handlers)
+ if (type < 0 || type >= RTM_NR_MSGTYPES)
continue;
- dumpit = READ_ONCE(handlers[type].dumpit);
+ tab = rcu_dereference_rtnl(rtnl_msg_handlers[idx]);
+ if (!tab)
+ continue;
+
+ link = tab[type];
+ if (!link)
+ continue;
+
+ dumpit = link->dumpit;
if (!dumpit)
continue;
@@ -2859,7 +3178,8 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
unsigned int change,
- u32 event, gfp_t flags)
+ u32 event, gfp_t flags, int *new_nsid,
+ int new_ifindex)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
@@ -2870,7 +3190,9 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
if (skb == NULL)
goto errout;
- err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event);
+ err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
+ type, 0, 0, change, 0, 0, event,
+ new_nsid, new_ifindex, -1);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -2893,14 +3215,15 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
static void rtmsg_ifinfo_event(int type, struct net_device *dev,
unsigned int change, u32 event,
- gfp_t flags)
+ gfp_t flags, int *new_nsid, int new_ifindex)
{
struct sk_buff *skb;
if (dev->reg_state != NETREG_REGISTERED)
return;
- skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags);
+ skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid,
+ new_ifindex);
if (skb)
rtmsg_ifinfo_send(skb, dev, flags);
}
@@ -2908,9 +3231,16 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev,
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
gfp_t flags)
{
- rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags);
+ rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
+ NULL, 0);
+}
+
+void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
+ gfp_t flags, int *new_nsid, int new_ifindex)
+{
+ rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
+ new_nsid, new_ifindex);
}
-EXPORT_SYMBOL(rtmsg_ifinfo);
static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
struct net_device *dev,
@@ -3017,21 +3347,21 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
}
EXPORT_SYMBOL(ndo_dflt_fdb_add);
-static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid)
+static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid,
+ struct netlink_ext_ack *extack)
{
u16 vid = 0;
if (vlan_attr) {
if (nla_len(vlan_attr) != sizeof(u16)) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan\n");
+ NL_SET_ERR_MSG(extack, "invalid vlan attribute size");
return -EINVAL;
}
vid = nla_get_u16(vlan_attr);
if (!vid || vid >= VLAN_VID_MASK) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan id %d\n",
- vid);
+ NL_SET_ERR_MSG(extack, "invalid vlan id");
return -EINVAL;
}
}
@@ -3056,24 +3386,24 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex == 0) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ifindex\n");
+ NL_SET_ERR_MSG(extack, "invalid ifindex");
return -EINVAL;
}
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid address\n");
+ NL_SET_ERR_MSG(extack, "invalid address");
return -EINVAL;
}
addr = nla_data(tb[NDA_LLADDR]);
- err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+ err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
if (err)
return err;
@@ -3160,24 +3490,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex == 0) {
- pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ifindex\n");
+ NL_SET_ERR_MSG(extack, "invalid ifindex");
return -EINVAL;
}
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
- pr_info("PF_BRIDGE: RTM_DELNEIGH with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
- pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n");
+ NL_SET_ERR_MSG(extack, "invalid address");
return -EINVAL;
}
addr = nla_data(tb[NDA_LLADDR]);
- err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+ err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
if (err)
return err;
@@ -3617,7 +3947,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
dev = __dev_get_by_index(net, ifm->ifi_index);
if (!dev) {
- pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
@@ -3692,7 +4022,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
dev = __dev_get_by_index(net, ifm->ifi_index);
if (!dev) {
- pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
@@ -3943,25 +4273,30 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
if (!attr)
goto nla_put_failure;
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->fill_stats_af) {
struct nlattr *af;
int err;
af = nla_nest_start(skb, af_ops->family);
- if (!af)
+ if (!af) {
+ rcu_read_unlock();
goto nla_put_failure;
-
+ }
err = af_ops->fill_stats_af(skb, dev);
- if (err == -ENODATA)
+ if (err == -ENODATA) {
nla_nest_cancel(skb, af);
- else if (err < 0)
+ } else if (err < 0) {
+ rcu_read_unlock();
goto nla_put_failure;
+ }
nla_nest_end(skb, af);
}
}
+ rcu_read_unlock();
nla_nest_end(skb, attr);
@@ -4030,7 +4365,8 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
/* for IFLA_STATS_AF_SPEC */
size += nla_total_size(0);
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->get_stats_af_size) {
size += nla_total_size(
af_ops->get_stats_af_size(dev));
@@ -4039,6 +4375,7 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
size += nla_total_size(0);
}
}
+ rcu_read_unlock();
}
return size;
@@ -4155,7 +4492,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
- struct rtnl_link *handlers;
+ struct rtnl_link *link;
+ struct module *owner;
int err = -EOPNOTSUPP;
rtnl_doit_func doit;
unsigned int flags;
@@ -4179,79 +4517,85 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- if (family >= ARRAY_SIZE(rtnl_msg_handlers))
- family = PF_UNSPEC;
-
rcu_read_lock();
- handlers = rcu_dereference(rtnl_msg_handlers[family]);
- if (!handlers) {
- family = PF_UNSPEC;
- handlers = rcu_dereference(rtnl_msg_handlers[family]);
- }
-
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
struct sock *rtnl;
rtnl_dumpit_func dumpit;
u16 min_dump_alloc = 0;
- dumpit = READ_ONCE(handlers[type].dumpit);
- if (!dumpit) {
+ link = rtnl_get_link(family, type);
+ if (!link || !link->dumpit) {
family = PF_UNSPEC;
- handlers = rcu_dereference(rtnl_msg_handlers[PF_UNSPEC]);
- if (!handlers)
- goto err_unlock;
-
- dumpit = READ_ONCE(handlers[type].dumpit);
- if (!dumpit)
+ link = rtnl_get_link(family, type);
+ if (!link || !link->dumpit)
goto err_unlock;
}
-
- refcount_inc(&rtnl_msg_handlers_ref[family]);
+ owner = link->owner;
+ dumpit = link->dumpit;
if (type == RTM_GETLINK - RTM_BASE)
min_dump_alloc = rtnl_calcit(skb, nlh);
+ err = 0;
+ /* need to do this before rcu_read_unlock() */
+ if (!try_module_get(owner))
+ err = -EPROTONOSUPPORT;
+
rcu_read_unlock();
rtnl = net->rtnl;
- {
+ if (err == 0) {
struct netlink_dump_control c = {
.dump = dumpit,
.min_dump_alloc = min_dump_alloc,
+ .module = owner,
};
err = netlink_dump_start(rtnl, skb, nlh, &c);
+ /* netlink_dump_start() will keep a reference on
+ * module if dump is still in progress.
+ */
+ module_put(owner);
}
- refcount_dec(&rtnl_msg_handlers_ref[family]);
return err;
}
- doit = READ_ONCE(handlers[type].doit);
- if (!doit) {
+ link = rtnl_get_link(family, type);
+ if (!link || !link->doit) {
family = PF_UNSPEC;
- handlers = rcu_dereference(rtnl_msg_handlers[family]);
+ link = rtnl_get_link(PF_UNSPEC, type);
+ if (!link || !link->doit)
+ goto out_unlock;
}
- flags = READ_ONCE(handlers[type].flags);
+ owner = link->owner;
+ if (!try_module_get(owner)) {
+ err = -EPROTONOSUPPORT;
+ goto out_unlock;
+ }
+
+ flags = link->flags;
if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
- refcount_inc(&rtnl_msg_handlers_ref[family]);
- doit = READ_ONCE(handlers[type].doit);
+ doit = link->doit;
rcu_read_unlock();
if (doit)
err = doit(skb, nlh, extack);
- refcount_dec(&rtnl_msg_handlers_ref[family]);
+ module_put(owner);
return err;
}
-
rcu_read_unlock();
rtnl_lock();
- handlers = rtnl_dereference(rtnl_msg_handlers[family]);
- if (handlers) {
- doit = READ_ONCE(handlers[type].doit);
- if (doit)
- err = doit(skb, nlh, extack);
- }
+ link = rtnl_get_link(family, type);
+ if (link && link->doit)
+ err = link->doit(skb, nlh, extack);
rtnl_unlock();
+
+ module_put(owner);
+
+ return err;
+
+out_unlock:
+ rcu_read_unlock();
return err;
err_unlock:
@@ -4292,9 +4636,10 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
case NETDEV_CHANGEUPPER:
case NETDEV_RESEND_IGMP:
case NETDEV_CHANGEINFODATA:
+ case NETDEV_CHANGELOWERSTATE:
case NETDEV_CHANGE_TX_QUEUE_LEN:
rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
- GFP_KERNEL);
+ GFP_KERNEL, NULL, 0);
break;
default:
break;
@@ -4338,11 +4683,6 @@ static struct pernet_operations rtnetlink_net_ops = {
void __init rtnetlink_init(void)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(rtnl_msg_handlers_ref); i++)
- refcount_set(&rtnl_msg_handlers_ref[i], 1);
-
if (register_pernet_subsys(&rtnetlink_net_ops))
panic("rtnetlink_init: cannot initialize rtnetlink\n");