summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/amazon/ena/ena_netdev.c
diff options
context:
space:
mode:
authorNetanel Belgazal2017-12-28 22:31:30 +0100
committerDavid S. Miller2018-01-02 20:35:12 +0100
commit8510e1a3d16c7e4e2b47c9675b18725407c616b7 (patch)
treebb5285244774553c8ea08da5f24a89496d787e73 /drivers/net/ethernet/amazon/ena/ena_netdev.c
parentMerge branch 'tcp-sctp-dccp-Replace-jprobe-usage-with-trace-events' (diff)
downloadkernel-qcow2-linux-8510e1a3d16c7e4e2b47c9675b18725407c616b7.tar.gz
kernel-qcow2-linux-8510e1a3d16c7e4e2b47c9675b18725407c616b7.tar.xz
kernel-qcow2-linux-8510e1a3d16c7e4e2b47c9675b18725407c616b7.zip
net: ena: add detection and recovery mechanism for handling missed/misrouted MSI-X
A mechanism for detection of stuck Rx/Tx rings due to missed or misrouted interrupts. Check if there are unhandled completion descriptors before the first MSI-X interrupt arrived. The check is per queue and per interrupt vector. Once such condition is detected, driver and device reset is scheduled. Signed-off-by: Netanel Belgazal <netanel@amazon.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/amazon/ena/ena_netdev.c')
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c68
1 files changed, 61 insertions, 7 deletions
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 97c5a89a9cf7..a6f283232cb7 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -158,6 +158,8 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter,
ring->per_napi_packets = 0;
ring->per_napi_bytes = 0;
ring->cpu = 0;
+ ring->first_interrupt = false;
+ ring->no_interrupt_event_cnt = 0;
u64_stats_init(&ring->syncp);
}
@@ -1274,6 +1276,9 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data)
{
struct ena_napi *ena_napi = data;
+ ena_napi->tx_ring->first_interrupt = true;
+ ena_napi->rx_ring->first_interrupt = true;
+
napi_schedule_irqoff(&ena_napi->napi);
return IRQ_HANDLED;
@@ -2648,8 +2653,32 @@ static void ena_fw_reset_device(struct work_struct *work)
rtnl_unlock();
}
-static int check_missing_comp_in_queue(struct ena_adapter *adapter,
- struct ena_ring *tx_ring)
+static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
+ struct ena_ring *rx_ring)
+{
+ if (likely(rx_ring->first_interrupt))
+ return 0;
+
+ if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
+ return 0;
+
+ rx_ring->no_interrupt_event_cnt++;
+
+ if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
+ netif_err(adapter, rx_err, adapter->netdev,
+ "Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
+ rx_ring->qid);
+ adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
+ smp_mb__before_atomic();
+ set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
+ struct ena_ring *tx_ring)
{
struct ena_tx_buffer *tx_buf;
unsigned long last_jiffies;
@@ -2659,8 +2688,27 @@ static int check_missing_comp_in_queue(struct ena_adapter *adapter,
for (i = 0; i < tx_ring->ring_size; i++) {
tx_buf = &tx_ring->tx_buffer_info[i];
last_jiffies = tx_buf->last_jiffies;
- if (unlikely(last_jiffies &&
- time_is_before_jiffies(last_jiffies + adapter->missing_tx_completion_to))) {
+
+ if (last_jiffies == 0)
+ /* no pending Tx at this location */
+ continue;
+
+ if (unlikely(!tx_ring->first_interrupt && time_is_before_jiffies(last_jiffies +
+ 2 * adapter->missing_tx_completion_to))) {
+ /* If after graceful period interrupt is still not
+ * received, we schedule a reset
+ */
+ netif_err(adapter, tx_err, adapter->netdev,
+ "Potential MSIX issue on Tx side Queue = %d. Reset the device\n",
+ tx_ring->qid);
+ adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
+ smp_mb__before_atomic();
+ set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ return -EIO;
+ }
+
+ if (unlikely(time_is_before_jiffies(last_jiffies +
+ adapter->missing_tx_completion_to))) {
if (!tx_buf->print_once)
netif_notice(adapter, tx_err, adapter->netdev,
"Found a Tx that wasn't completed on time, qid %d, index %d.\n",
@@ -2689,9 +2737,10 @@ static int check_missing_comp_in_queue(struct ena_adapter *adapter,
return rc;
}
-static void check_for_missing_tx_completions(struct ena_adapter *adapter)
+static void check_for_missing_completions(struct ena_adapter *adapter)
{
struct ena_ring *tx_ring;
+ struct ena_ring *rx_ring;
int i, budget, rc;
/* Make sure the driver doesn't turn the device in other process */
@@ -2710,8 +2759,13 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
tx_ring = &adapter->tx_ring[i];
+ rx_ring = &adapter->rx_ring[i];
+
+ rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
+ if (unlikely(rc))
+ return;
- rc = check_missing_comp_in_queue(adapter, tx_ring);
+ rc = check_for_rx_interrupt_queue(adapter, rx_ring);
if (unlikely(rc))
return;
@@ -2870,7 +2924,7 @@ static void ena_timer_service(struct timer_list *t)
check_for_admin_com_state(adapter);
- check_for_missing_tx_completions(adapter);
+ check_for_missing_completions(adapter);
check_for_empty_rx_ring(adapter);