net/mlx4_core: panic the system on unrecoverable errors

author Santosh Shilimkar <santosh.shilimkar@oracle.com>

Wed, 7 Dec 2016 23:06:59 +0000 (15:06 -0800)

committer Chuck Anderson <chuck.anderson@oracle.com>

Tue, 30 May 2017 05:50:46 +0000 (22:50 -0700)
author Santosh Shilimkar <santosh.shilimkar@oracle.com>
Wed, 7 Dec 2016 23:06:59 +0000 (15:06 -0800)
committer Chuck Anderson <chuck.anderson@oracle.com>
Tue, 30 May 2017 05:50:46 +0000 (22:50 -0700)
diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c

index 715de8affcc950e0ea18fd706bc8f04542d34a6f..91d8c925b7786452fc1a3728b71d1bc91c9c9084 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -188,6 +188,14 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
         mlx4_err(dev, "device was reset successfully\n");
         mutex_unlock(&persist->device_state_mutex);
  
+       /* Mellanox device reset and recovery has never worked and
+        * in fact ends up hanging the system which needs a hard reboot
+        * of the system. Instead of waiting for recovery which never
+        * going to happen, just panic the system so that it can capture
+        * all the necessary logs/vmcore and let the node graceful shutdown.
+        */
+       panic("MLX4 device reset due to unrecoverable catastrophic failure\n");
+
         /* At that step HW was already reset, now notify clients */
         mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
         mlx4_cmd_wake_completions(dev);
author	Santosh Shilimkar <santosh.shilimkar@oracle.com>
	Wed, 7 Dec 2016 23:06:59 +0000 (15:06 -0800)
committer	Chuck Anderson <chuck.anderson@oracle.com>
	Tue, 30 May 2017 05:50:46 +0000 (22:50 -0700)