summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx4/main.c
diff options
context:
space:
mode:
authorYishai Hadas2015-01-25 15:59:42 +0100
committerDavid S. Miller2015-01-25 23:43:14 +0100
commit55ad359225b2232b9b8f04a0dfa169bd3a7d86d2 (patch)
tree6fac830a70a14c36103ff7fc15af01a50d8926b3 /drivers/net/ethernet/mellanox/mlx4/main.c
parentnet/mlx4_core: Handle AER flow properly (diff)
downloadkernel-qcow2-linux-55ad359225b2232b9b8f04a0dfa169bd3a7d86d2.tar.gz
kernel-qcow2-linux-55ad359225b2232b9b8f04a0dfa169bd3a7d86d2.tar.xz
kernel-qcow2-linux-55ad359225b2232b9b8f04a0dfa169bd3a7d86d2.zip
net/mlx4_core: Enable device recovery flow with SRIOV
In SRIOV, both the PF and the VF may attempt device recovery whenever they assume that the device is not functioning. When the PF driver resets the device, the VF should detect this and attempt to reinitialize itself. The VF must be able to reset itself under all circumstances, even if the PF is not responsive. The VF shall reset itself in the following cases: 1. Commands are not processed within reasonable time over the communication channel. This is done considering device state and the correct return code based on the command as was done in the native mode, done in the next patch. 2. The VF driver receives an internal error event reported by the PF on the communication channel. This occurs when the PF driver resets the device or when VF is out of sync with the PF. Add 'VF reset' capability, which allows the VF to reinitialize itself even when the PF is not responsive. As PF and VF may run their reset flow simulantanisly, there are several cases that are handled: - Prevent freeing VF resources upon FLR, when PF is in its unloading stage. - Prevent PF getting VF commands before it has finished initializing its resources. - Upon VF startup, check that comm-channel is online before sending commands to the PF and getting timed-out. Signed-off-by: Yishai Hadas <yishaih@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4/main.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c135
1 files changed, 108 insertions, 27 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 6bb0fca137cd..1baf1f1e2866 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -108,6 +108,8 @@ MODULE_PARM_DESC(enable_64b_cqe_eqe,
MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
MLX4_FUNC_CAP_DMFS_A0_STATIC)
+#define RESET_PERSIST_MASK_FLAGS (MLX4_FLAG_SRIOV)
+
static char mlx4_version[] =
DRV_NAME ": Mellanox ConnectX core driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -1579,6 +1581,50 @@ static void mlx4_close_fw(struct mlx4_dev *dev)
}
}
+static int mlx4_comm_check_offline(struct mlx4_dev *dev)
+{
+#define COMM_CHAN_OFFLINE_OFFSET 0x09
+
+ u32 comm_flags;
+ u32 offline_bit;
+ unsigned long end;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies;
+ while (time_before(jiffies, end)) {
+ comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
+ MLX4_COMM_CHAN_FLAGS));
+ offline_bit = (comm_flags &
+ (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
+ if (!offline_bit)
+ return 0;
+ /* There are cases as part of AER/Reset flow that PF needs
+ * around 100 msec to load. We therefore sleep for 100 msec
+ * to allow other tasks to make use of that CPU during this
+ * time interval.
+ */
+ msleep(100);
+ }
+ mlx4_err(dev, "Communication channel is offline.\n");
+ return -EIO;
+}
+
+static void mlx4_reset_vf_support(struct mlx4_dev *dev)
+{
+#define COMM_CHAN_RST_OFFSET 0x1e
+
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u32 comm_rst;
+ u32 comm_caps;
+
+ comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm +
+ MLX4_COMM_CHAN_CAPS));
+ comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET));
+
+ if (comm_rst)
+ dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET;
+}
+
static int mlx4_init_slave(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1594,6 +1640,12 @@ static int mlx4_init_slave(struct mlx4_dev *dev)
mutex_lock(&priv->cmd.slave_cmd_mutex);
priv->cmd.max_cmds = 1;
+ if (mlx4_comm_check_offline(dev)) {
+ mlx4_err(dev, "PF is not responsive, skipping initialization\n");
+ goto err_offline;
+ }
+
+ mlx4_reset_vf_support(dev);
mlx4_warn(dev, "Sending reset\n");
ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
MLX4_COMM_TIME);
@@ -1637,6 +1689,7 @@ static int mlx4_init_slave(struct mlx4_dev *dev)
err:
mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
+err_offline:
mutex_unlock(&priv->cmd.slave_cmd_mutex);
return -EIO;
}
@@ -2494,11 +2547,19 @@ static void mlx4_free_ownership(struct mlx4_dev *dev)
!!((flags) & MLX4_FLAG_MASTER))
static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
- u8 total_vfs, int existing_vfs)
+ u8 total_vfs, int existing_vfs, int reset_flow)
{
u64 dev_flags = dev->flags;
int err = 0;
+ if (reset_flow) {
+ dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs),
+ GFP_KERNEL);
+ if (!dev->dev_vfs)
+ goto free_mem;
+ return dev_flags;
+ }
+
atomic_inc(&pf_loading);
if (dev->flags & MLX4_FLAG_SRIOV) {
if (existing_vfs != total_vfs) {
@@ -2533,6 +2594,7 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
disable_sriov:
atomic_dec(&pf_loading);
+free_mem:
dev->persist->num_vfs = 0;
kfree(dev->dev_vfs);
return dev_flags & ~MLX4_FLAG_MASTER;
@@ -2557,7 +2619,8 @@ static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap
}
static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
- int total_vfs, int *nvfs, struct mlx4_priv *priv)
+ int total_vfs, int *nvfs, struct mlx4_priv *priv,
+ int reset_flow)
{
struct mlx4_dev *dev;
unsigned sum = 0;
@@ -2679,8 +2742,10 @@ slave_start:
goto err_fw;
if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
- u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
- existing_vfs);
+ u64 dev_flags = mlx4_enable_sriov(dev, pdev,
+ total_vfs,
+ existing_vfs,
+ reset_flow);
mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
dev->flags = dev_flags;
@@ -2722,7 +2787,7 @@ slave_start:
if (dev->flags & MLX4_FLAG_SRIOV) {
if (!existing_vfs)
pci_disable_sriov(pdev);
- if (mlx4_is_master(dev))
+ if (mlx4_is_master(dev) && !reset_flow)
atomic_dec(&pf_loading);
dev->flags &= ~MLX4_FLAG_SRIOV;
}
@@ -2736,7 +2801,8 @@ slave_start:
}
if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
- u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs);
+ u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
+ existing_vfs, reset_flow);
if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
@@ -2848,6 +2914,17 @@ slave_start:
goto err_steer;
mlx4_init_quotas(dev);
+ /* When PF resources are ready arm its comm channel to enable
+ * getting commands
+ */
+ if (mlx4_is_master(dev)) {
+ err = mlx4_ARM_COMM_CHANNEL(dev);
+ if (err) {
+ mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
+ err);
+ goto err_steer;
+ }
+ }
for (port = 1; port <= dev->caps.num_ports; port++) {
err = mlx4_init_port_info(dev, port);
@@ -2866,7 +2943,7 @@ slave_start:
priv->removed = 0;
- if (mlx4_is_master(dev) && dev->persist->num_vfs)
+ if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
atomic_dec(&pf_loading);
kfree(dev_cap);
@@ -2925,10 +3002,12 @@ err_cmd:
mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
err_sriov:
- if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs)
+ if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) {
pci_disable_sriov(pdev);
+ dev->flags &= ~MLX4_FLAG_SRIOV;
+ }
- if (mlx4_is_master(dev) && dev->persist->num_vfs)
+ if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
atomic_dec(&pf_loading);
kfree(priv->dev.dev_vfs);
@@ -3073,7 +3152,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
if (err)
goto err_release_regions;
- err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
+ err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
if (err)
goto err_catas;
@@ -3131,9 +3210,11 @@ static void mlx4_clean_dev(struct mlx4_dev *dev)
{
struct mlx4_dev_persistent *persist = dev->persist;
struct mlx4_priv *priv = mlx4_priv(dev);
+ unsigned long flags = (dev->flags & RESET_PERSIST_MASK_FLAGS);
memset(priv, 0, sizeof(*priv));
priv->dev.persist = persist;
+ priv->dev.flags = flags;
}
static void mlx4_unload_one(struct pci_dev *pdev)
@@ -3143,7 +3224,6 @@ static void mlx4_unload_one(struct pci_dev *pdev)
struct mlx4_priv *priv = mlx4_priv(dev);
int pci_dev_data;
int p, i;
- int active_vfs = 0;
if (priv->removed)
return;
@@ -3157,14 +3237,6 @@ static void mlx4_unload_one(struct pci_dev *pdev)
pci_dev_data = priv->pci_dev_data;
- /* Disabling SR-IOV is not allowed while there are active vf's */
- if (mlx4_is_master(dev)) {
- active_vfs = mlx4_how_many_lives_vf(dev);
- if (active_vfs) {
- pr_warn("Removing PF when there are active VF's !!\n");
- pr_warn("Will not disable SR-IOV.\n");
- }
- }
mlx4_stop_sense(dev);
mlx4_unregister_device(dev);
@@ -3208,12 +3280,6 @@ static void mlx4_unload_one(struct pci_dev *pdev)
if (dev->flags & MLX4_FLAG_MSI_X)
pci_disable_msix(pdev);
- if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
- mlx4_warn(dev, "Disabling SR-IOV\n");
- pci_disable_sriov(pdev);
- dev->flags &= ~MLX4_FLAG_SRIOV;
- dev->persist->num_vfs = 0;
- }
if (!mlx4_is_slave(dev))
mlx4_free_ownership(dev);
@@ -3235,11 +3301,21 @@ static void mlx4_remove_one(struct pci_dev *pdev)
struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
struct mlx4_dev *dev = persist->dev;
struct mlx4_priv *priv = mlx4_priv(dev);
+ int active_vfs = 0;
mutex_lock(&persist->interface_state_mutex);
persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
mutex_unlock(&persist->interface_state_mutex);
+ /* Disabling SR-IOV is not allowed while there are active vf's */
+ if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) {
+ active_vfs = mlx4_how_many_lives_vf(dev);
+ if (active_vfs) {
+ pr_warn("Removing PF when there are active VF's !!\n");
+ pr_warn("Will not disable SR-IOV.\n");
+ }
+ }
+
/* device marked to be under deletion running now without the lock
* letting other tasks to be terminated
*/
@@ -3248,6 +3324,11 @@ static void mlx4_remove_one(struct pci_dev *pdev)
else
mlx4_info(dev, "%s: interface is down\n", __func__);
mlx4_catas_end(dev);
+ if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
+ mlx4_warn(dev, "Disabling SR-IOV\n");
+ pci_disable_sriov(pdev);
+ }
+
pci_release_regions(pdev);
pci_disable_device(pdev);
kfree(dev->persist);
@@ -3287,7 +3368,7 @@ int mlx4_restart_one(struct pci_dev *pdev)
memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
mlx4_unload_one(pdev);
- err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
+ err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1);
if (err) {
mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
__func__, pci_name(pdev), err);
@@ -3397,7 +3478,7 @@ static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
mutex_lock(&persist->interface_state_mutex);
if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs,
- priv);
+ priv, 1);
if (ret) {
mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n",
__func__, ret);