From 822973ba79fd5a5b711270c2de7196c6b50c6687 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 2 May 2008 17:49:37 -0700 Subject: bonding: Do not call free_netdev for already registered device. If the call to bond_create_sysfs_entry in bond_create fails, the proper rollback is to call unregister_netdevice, not free_netdev. Otherwise - kernel BUG at net/core/dev.c:4057! Checked with artificial failures injected into bond_create_sysfs_entry. Pavel's original patch modified by Jay Vosburgh to move code around for clarity (remove goto-hopping within the unwind block). Signed-off-by: Pavel Emelyanov Signed-off-by: Jay Vosburgh Signed-off-by: Jeff Garzik --- drivers/net/bonding/bond_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/net/bonding/bond_main.c') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 6425603bc379..5509732d3f9d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4936,7 +4936,9 @@ int bond_create(char *name, struct bond_params *params, struct bonding **newbond if (res < 0) { rtnl_lock(); down_write(&bonding_rwsem); - goto out_bond; + bond_deinit(bond_dev); + unregister_netdevice(bond_dev); + goto out_rtnl; } return 0; -- cgit v1.2.3-55-g7522 From ae68c39819ddf30549652962768a50edae5eec6f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 2 May 2008 17:49:39 -0700 Subject: bonding: Deadlock between bonding_store_bonds and bond_destroy_sysfs. The sysfs layer has an internal protection, that ensures, that all the process sitting inside ->sore/->show callback exits before the appropriate entry is unregistered (the calltraces are rather big, but I can provide them if required). On the other hand, bonding takes rtnl_lock in a) the bonding_store_bonds, i.e. in ->store callback, b) module exit before calling the sysfs unregister routines. Thus, the classical AB-BA deadlock may occur. To reproduce run # while :; do modprobe bonding; rmmod bonding; done and # while :; do echo '+bond%d' > /sys/class/net/bonding_masters ; done in parallel. The fix is to move the bond_destroy_sysfs out of the rtnl_lock, but _before_ bond_free_all to make sure no bonding devices exist after module unload. Signed-off-by: Pavel Emelyanov Acked-by: Jay Vosburgh Signed-off-by: Jeff Garzik --- drivers/net/bonding/bond_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/net/bonding/bond_main.c') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5509732d3f9d..e41b3e57260c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4992,9 +4992,10 @@ err: destroy_workqueue(bond->wq); } + bond_destroy_sysfs(); + rtnl_lock(); bond_free_all(); - bond_destroy_sysfs(); rtnl_unlock(); out: return res; @@ -5006,9 +5007,10 @@ static void __exit bonding_exit(void) unregister_netdevice_notifier(&bond_netdev_notifier); unregister_inetaddr_notifier(&bond_inetaddr_notifier); + bond_destroy_sysfs(); + rtnl_lock(); bond_free_all(); - bond_destroy_sysfs(); rtnl_unlock(); } -- cgit v1.2.3-55-g7522 From 569f0c4d909c7f73de634abcdc36344cb72de36a Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 2 May 2008 18:06:02 -0700 Subject: bonding: fix enslavement error unwinds As part of: commit c2edacf80e155ef54ae4774379d461b60896bc2e Author: Jay Vosburgh Date: Mon Jul 9 10:42:47 2007 -0700 bonding / ipv6: no addrconf for slaves separately from master two steps were rearranged in the enslavement process: netdev_set_master is now before the call to dev_open to open the slave. This patch updates the error cases and unwind process at the end of bond_enslave to match the new order. Without this patch, it is possible for the enslavement to fail, but leave the slave with IFF_SLAVE set in its flags. Signed-off-by: Jay Vosburgh Signed-off-by: Jeff Garzik --- drivers/net/bonding/bond_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/net/bonding/bond_main.c') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e41b3e57260c..50a40e433154 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1425,13 +1425,13 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) res = netdev_set_master(slave_dev, bond_dev); if (res) { dprintk("Error %d calling netdev_set_master\n", res); - goto err_close; + goto err_restore_mac; } /* open the slave since the application closed it */ res = dev_open(slave_dev); if (res) { dprintk("Openning slave %s failed\n", slave_dev->name); - goto err_restore_mac; + goto err_unset_master; } new_slave->dev = slave_dev; @@ -1444,7 +1444,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) */ res = bond_alb_init_slave(bond, new_slave); if (res) { - goto err_unset_master; + goto err_close; } } @@ -1619,7 +1619,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) res = bond_create_slave_symlinks(bond_dev, slave_dev); if (res) - goto err_unset_master; + goto err_close; printk(KERN_INFO DRV_NAME ": %s: enslaving %s as a%s interface with a%s link.\n", @@ -1631,12 +1631,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) return 0; /* Undo stages on error */ -err_unset_master: - netdev_set_master(slave_dev, NULL); - err_close: dev_close(slave_dev); +err_unset_master: + netdev_set_master(slave_dev, NULL); + err_restore_mac: if (!bond->params.fail_over_mac) { memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); -- cgit v1.2.3-55-g7522