summaryrefslogtreecommitdiffstats
path: root/net/ipv4/ipmr.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ipmr.c')
-rw-r--r--net/ipv4/ipmr.c268
1 files changed, 145 insertions, 123 deletions
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index efc1e76d4977..c0317c940bcd 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -299,10 +299,29 @@ static void __net_exit ipmr_rules_exit(struct net *net)
}
#endif
+static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct mfc_cache_cmp_arg *cmparg = arg->key;
+ struct mfc_cache *c = (struct mfc_cache *)ptr;
+
+ return cmparg->mfc_mcastgrp != c->mfc_mcastgrp ||
+ cmparg->mfc_origin != c->mfc_origin;
+}
+
+static const struct rhashtable_params ipmr_rht_params = {
+ .head_offset = offsetof(struct mfc_cache, mnode),
+ .key_offset = offsetof(struct mfc_cache, cmparg),
+ .key_len = sizeof(struct mfc_cache_cmp_arg),
+ .nelem_hint = 3,
+ .locks_mul = 1,
+ .obj_cmpfn = ipmr_hash_cmp,
+ .automatic_shrinking = true,
+};
+
static struct mr_table *ipmr_new_table(struct net *net, u32 id)
{
struct mr_table *mrt;
- unsigned int i;
/* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */
if (id != RT_TABLE_DEFAULT && id >= 1000000000)
@@ -318,10 +337,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
write_pnet(&mrt->net, net);
mrt->id = id;
- /* Forwarding cache */
- for (i = 0; i < MFC_LINES; i++)
- INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
-
+ rhltable_init(&mrt->mfc_hash, &ipmr_rht_params);
+ INIT_LIST_HEAD(&mrt->mfc_cache_list);
INIT_LIST_HEAD(&mrt->mfc_unres_queue);
setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
@@ -338,6 +355,7 @@ static void ipmr_free_table(struct mr_table *mrt)
{
del_timer_sync(&mrt->ipmr_expire_timer);
mroute_clean_tables(mrt, true);
+ rhltable_destroy(&mrt->mfc_hash);
kfree(mrt);
}
@@ -839,13 +857,17 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
__be32 origin,
__be32 mcastgrp)
{
- int line = MFC_HASH(mcastgrp, origin);
+ struct mfc_cache_cmp_arg arg = {
+ .mfc_mcastgrp = mcastgrp,
+ .mfc_origin = origin
+ };
+ struct rhlist_head *tmp, *list;
struct mfc_cache *c;
- list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) {
- if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
- return c;
- }
+ list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode)
+ return c;
+
return NULL;
}
@@ -853,13 +875,16 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
int vifi)
{
- int line = MFC_HASH(htonl(INADDR_ANY), htonl(INADDR_ANY));
+ struct mfc_cache_cmp_arg arg = {
+ .mfc_mcastgrp = htonl(INADDR_ANY),
+ .mfc_origin = htonl(INADDR_ANY)
+ };
+ struct rhlist_head *tmp, *list;
struct mfc_cache *c;
- list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list)
- if (c->mfc_origin == htonl(INADDR_ANY) &&
- c->mfc_mcastgrp == htonl(INADDR_ANY) &&
- c->mfc_un.res.ttls[vifi] < 255)
+ list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode)
+ if (c->mfc_un.res.ttls[vifi] < 255)
return c;
return NULL;
@@ -869,29 +894,51 @@ static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
__be32 mcastgrp, int vifi)
{
- int line = MFC_HASH(mcastgrp, htonl(INADDR_ANY));
+ struct mfc_cache_cmp_arg arg = {
+ .mfc_mcastgrp = mcastgrp,
+ .mfc_origin = htonl(INADDR_ANY)
+ };
+ struct rhlist_head *tmp, *list;
struct mfc_cache *c, *proxy;
if (mcastgrp == htonl(INADDR_ANY))
goto skip;
- list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list)
- if (c->mfc_origin == htonl(INADDR_ANY) &&
- c->mfc_mcastgrp == mcastgrp) {
- if (c->mfc_un.res.ttls[vifi] < 255)
- return c;
-
- /* It's ok if the vifi is part of the static tree */
- proxy = ipmr_cache_find_any_parent(mrt,
- c->mfc_parent);
- if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
- return c;
- }
+ list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode) {
+ if (c->mfc_un.res.ttls[vifi] < 255)
+ return c;
+
+ /* It's ok if the vifi is part of the static tree */
+ proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
+ if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
+ return c;
+ }
skip:
return ipmr_cache_find_any_parent(mrt, vifi);
}
+/* Look for a (S,G,iif) entry if parent != -1 */
+static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
+ __be32 origin, __be32 mcastgrp,
+ int parent)
+{
+ struct mfc_cache_cmp_arg arg = {
+ .mfc_mcastgrp = mcastgrp,
+ .mfc_origin = origin,
+ };
+ struct rhlist_head *tmp, *list;
+ struct mfc_cache *c;
+
+ list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode)
+ if (parent == -1 || parent == c->mfc_parent)
+ return c;
+
+ return NULL;
+}
+
/* Allocate a multicast cache entry */
static struct mfc_cache *ipmr_cache_alloc(void)
{
@@ -1028,10 +1075,10 @@ static int ipmr_cache_report(struct mr_table *mrt,
static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
struct sk_buff *skb)
{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct mfc_cache *c;
bool found = false;
int err;
- struct mfc_cache *c;
- const struct iphdr *iph = ip_hdr(skb);
spin_lock_bh(&mfc_unres_lock);
list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
@@ -1095,46 +1142,39 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
{
- int line;
- struct mfc_cache *c, *next;
+ struct mfc_cache *c;
- line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
+ mfc->mfcc_mcastgrp.s_addr, parent);
+ rcu_read_unlock();
+ if (!c)
+ return -ENOENT;
+ rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
+ list_del_rcu(&c->list);
+ mroute_netlink_event(mrt, c, RTM_DELROUTE);
+ ipmr_cache_free(c);
- list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
- if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
- c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr &&
- (parent == -1 || parent == c->mfc_parent)) {
- list_del_rcu(&c->list);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_free(c);
- return 0;
- }
- }
- return -ENOENT;
+ return 0;
}
static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
struct mfcctl *mfc, int mrtsock, int parent)
{
- bool found = false;
- int line;
struct mfc_cache *uc, *c;
+ bool found;
+ int ret;
if (mfc->mfcc_parent >= MAXVIFS)
return -ENFILE;
- line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
-
- list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
- if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
- c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr &&
- (parent == -1 || parent == c->mfc_parent)) {
- found = true;
- break;
- }
- }
-
- if (found) {
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
+ mfc->mfcc_mcastgrp.s_addr, parent);
+ rcu_read_unlock();
+ if (c) {
write_lock_bh(&mrt_lock);
c->mfc_parent = mfc->mfcc_parent;
ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
@@ -1160,8 +1200,14 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
if (!mrtsock)
c->mfc_flags |= MFC_STATIC;
- list_add_rcu(&c->list, &mrt->mfc_cache_array[line]);
-
+ ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
+ ipmr_rht_params);
+ if (ret) {
+ pr_err("ipmr: rhtable insert error %d\n", ret);
+ ipmr_cache_free(c);
+ return ret;
+ }
+ list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
/* Check to see if we resolved a queued list. If so we
* need to send on the frames and tidy up.
*/
@@ -1191,9 +1237,9 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
/* Close the multicast socket, and clear the vif tables etc */
static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
- int i;
+ struct mfc_cache *c, *tmp;
LIST_HEAD(list);
- struct mfc_cache *c, *next;
+ int i;
/* Shut down all active vif entries */
for (i = 0; i < mrt->maxvif; i++) {
@@ -1204,19 +1250,18 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
unregister_netdevice_many(&list);
/* Wipe the cache */
- for (i = 0; i < MFC_LINES; i++) {
- list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
- if (!all && (c->mfc_flags & MFC_STATIC))
- continue;
- list_del_rcu(&c->list);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_free(c);
- }
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+ if (!all && (c->mfc_flags & MFC_STATIC))
+ continue;
+ rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
+ list_del_rcu(&c->list);
+ mroute_netlink_event(mrt, c, RTM_DELROUTE);
+ ipmr_cache_free(c);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
list_del(&c->list);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
ipmr_destroy_unres(mrt, c);
@@ -1791,9 +1836,9 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc_cache *cache,
int local)
{
+ int true_vifi = ipmr_find_vif(mrt, skb->dev);
int psend = -1;
int vif, ct;
- int true_vifi = ipmr_find_vif(mrt, skb->dev);
vif = cache->mfc_parent;
cache->mfc_un.res.pkt++;
@@ -2091,8 +2136,10 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
int ct;
/* If cache is unresolved, don't try to parse IIF and OIF */
- if (c->mfc_parent >= MAXVIFS)
+ if (c->mfc_parent >= MAXVIFS) {
+ rtm->rtm_flags |= RTNH_F_UNRESOLVED;
return -ENOENT;
+ }
if (VIF_EXISTS(mrt, c->mfc_parent) &&
nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
@@ -2134,7 +2181,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
int ipmr_get_route(struct net *net, struct sk_buff *skb,
__be32 saddr, __be32 daddr,
- struct rtmsg *rtm, int nowait, u32 portid)
+ struct rtmsg *rtm, u32 portid)
{
struct mfc_cache *cache;
struct mr_table *mrt;
@@ -2158,11 +2205,6 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
struct net_device *dev;
int vif = -1;
- if (nowait) {
- rcu_read_unlock();
- return -EAGAIN;
- }
-
dev = skb->dev;
read_lock(&mrt_lock);
if (dev)
@@ -2296,34 +2338,30 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
struct mr_table *mrt;
struct mfc_cache *mfc;
unsigned int t = 0, s_t;
- unsigned int h = 0, s_h;
unsigned int e = 0, s_e;
s_t = cb->args[0];
- s_h = cb->args[1];
- s_e = cb->args[2];
+ s_e = cb->args[1];
rcu_read_lock();
ipmr_for_each_table(mrt, net) {
if (t < s_t)
goto next_table;
- if (t > s_t)
- s_h = 0;
- for (h = s_h; h < MFC_LINES; h++) {
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_array[h], list) {
- if (e < s_e)
- goto next_entry;
- if (ipmr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0)
- goto done;
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+ if (e < s_e)
+ goto next_entry;
+ if (ipmr_fill_mroute(mrt, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ mfc, RTM_NEWROUTE,
+ NLM_F_MULTI) < 0)
+ goto done;
next_entry:
- e++;
- }
- e = s_e = 0;
+ e++;
}
+ e = 0;
+ s_e = 0;
+
spin_lock_bh(&mfc_unres_lock);
list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
if (e < s_e)
@@ -2340,16 +2378,15 @@ next_entry2:
e++;
}
spin_unlock_bh(&mfc_unres_lock);
- e = s_e = 0;
- s_h = 0;
+ e = 0;
+ s_e = 0;
next_table:
t++;
}
done:
rcu_read_unlock();
- cb->args[2] = e;
- cb->args[1] = h;
+ cb->args[1] = e;
cb->args[0] = t;
return skb->len;
@@ -2559,7 +2596,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
const char *name = vif->dev ? vif->dev->name : "none";
seq_printf(seq,
- "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
+ "%2zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
vif - mrt->vif_table,
name, vif->bytes_in, vif->pkt_in,
vif->bytes_out, vif->pkt_out,
@@ -2593,10 +2630,8 @@ struct ipmr_mfc_iter {
struct seq_net_private p;
struct mr_table *mrt;
struct list_head *cache;
- int ct;
};
-
static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
struct ipmr_mfc_iter *it, loff_t pos)
{
@@ -2604,12 +2639,10 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
struct mfc_cache *mfc;
rcu_read_lock();
- for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
- it->cache = &mrt->mfc_cache_array[it->ct];
- list_for_each_entry_rcu(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- }
+ it->cache = &mrt->mfc_cache_list;
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+ if (pos-- == 0)
+ return mfc;
rcu_read_unlock();
spin_lock_bh(&mfc_unres_lock);
@@ -2636,17 +2669,16 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
it->mrt = mrt;
it->cache = NULL;
- it->ct = 0;
return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
: SEQ_START_TOKEN;
}
static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct mfc_cache *mfc = v;
struct ipmr_mfc_iter *it = seq->private;
struct net *net = seq_file_net(seq);
struct mr_table *mrt = it->mrt;
+ struct mfc_cache *mfc = v;
++*pos;
@@ -2659,19 +2691,9 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (it->cache == &mrt->mfc_unres_queue)
goto end_of_list;
- BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
-
- while (++it->ct < MFC_LINES) {
- it->cache = &mrt->mfc_cache_array[it->ct];
- if (list_empty(it->cache))
- continue;
- return list_first_entry(it->cache, struct mfc_cache, list);
- }
-
/* exhausted cache_array, show unresolved */
rcu_read_unlock();
it->cache = &mrt->mfc_unres_queue;
- it->ct = 0;
spin_lock_bh(&mfc_unres_lock);
if (!list_empty(it->cache))
@@ -2691,7 +2713,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
if (it->cache == &mrt->mfc_unres_queue)
spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == &mrt->mfc_cache_array[it->ct])
+ else if (it->cache == &mrt->mfc_cache_list)
rcu_read_unlock();
}