]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
bnx2x: Recovery flow bug fixes
authorAriel Elior <ariele@broadcom.com>
Thu, 26 Jan 2012 06:01:52 +0000 (06:01 +0000)
committerJoe Jin <joe.jin@oracle.com>
Wed, 16 May 2012 14:41:21 +0000 (22:41 +0800)
1. Sample mcp pulse and mcp sequence in nic load instead of in init_one
as they may change by the time we want to use them.

2. Allow cnic to access device during nic load (by adding a new "LOADING" state
to recovery flow). This prevents the unnecessary cnic timeout which resulted
by cnic attempting to access because nic is loading, but being blocked because
of the Recovery state.

3. Issue 'fake' driver load command to mcp when last driver unloads to prevent
mcp from taking ownership. When recovery is complete unload fake driver to
allow mcp to initialize the hardware before first driver loads.

(cherry picked from commit 95c6c6165eaf5a031bcf31606e081c72e4acdeb8)
Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Joe Jin <joe.jin@oracle.com>
drivers/net/bnx2x/bnx2x.h
drivers/net/bnx2x/bnx2x_cmn.c
drivers/net/bnx2x/bnx2x_main.c

index 1f599e36e556158569566e686d8ba7e3e2473be5..609a0e95effaeba49164d25986edd17a70c4541c 100644 (file)
@@ -1022,7 +1022,8 @@ enum bnx2x_recovery_state {
        BNX2X_RECOVERY_DONE,
        BNX2X_RECOVERY_INIT,
        BNX2X_RECOVERY_WAIT,
-       BNX2X_RECOVERY_FAILED
+       BNX2X_RECOVERY_FAILED,
+       BNX2X_RECOVERY_NIC_LOADING
 };
 
 /*
index b875cfa062377b2664ce78e03b4d8c96cdff21c7..d00a7f55a38051a9cfd9430457193a7c4abb3b13 100644 (file)
@@ -1742,6 +1742,18 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
         * common blocks should be initialized, otherwise - not
         */
        if (!BP_NOMCP(bp)) {
+               /* init fw_seq */
+               bp->fw_seq =
+                       (SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) &
+                        DRV_MSG_SEQ_NUMBER_MASK);
+               BNX2X_DEV_INFO("fw_seq 0x%08x\n", bp->fw_seq);
+
+               /* Get current FW pulse sequence */
+               bp->fw_drv_pulse_wr_seq =
+                       (SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_pulse_mb) &
+                        DRV_PULSE_SEQ_MASK);
+               BNX2X_DEV_INFO("drv_pulse 0x%x\n", bp->fw_drv_pulse_wr_seq);
+
                load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ, 0);
                if (!load_code) {
                        BNX2X_ERR("MCP response failure, aborting\n");
@@ -3404,7 +3416,7 @@ int bnx2x_change_mtu(struct net_device *dev, int new_mtu)
        struct bnx2x *bp = netdev_priv(dev);
 
        if (bp->recovery_state != BNX2X_RECOVERY_DONE) {
-               pr_err("Handling parity error recovery. Try again later\n");
+               netdev_err(dev, "Handling parity error recovery. Try again later\n");
                return -EAGAIN;
        }
 
@@ -3530,7 +3542,7 @@ int bnx2x_resume(struct pci_dev *pdev)
        bp = netdev_priv(dev);
 
        if (bp->recovery_state != BNX2X_RECOVERY_DONE) {
-               pr_err("Handling parity error recovery. Try again later\n");
+               netdev_err(dev, "Handling parity error recovery. Try again later\n");
                return -EAGAIN;
        }
 
@@ -3546,8 +3558,6 @@ int bnx2x_resume(struct pci_dev *pdev)
        bnx2x_set_power_state(bp, PCI_D0);
        netif_device_attach(dev);
 
-       /* Since the chip was reset, clear the FW sequence number */
-       bp->fw_seq = 0;
        rc = bnx2x_nic_load(bp, LOAD_OPEN);
 
        rtnl_unlock();
index 9c3da418f36a0941be1c707c77c85a9eeba3852f..feb76951b9eaa3455ebc4cae612576c90cf9771e 100644 (file)
@@ -467,7 +467,9 @@ static int bnx2x_issue_dmae_with_comp(struct bnx2x *bp,
        while ((*wb_comp & ~DMAE_PCI_ERR_FLAG) != DMAE_COMP_VAL) {
                DP(BNX2X_MSG_OFF, "wb_comp 0x%08x\n", *wb_comp);
 
-               if (!cnt) {
+               if (!cnt ||
+                   (bp->recovery_state != BNX2X_RECOVERY_DONE &&
+                    bp->recovery_state != BNX2X_RECOVERY_NIC_LOADING)) {
                        BNX2X_ERR("DMAE timeout!\n");
                        rc = DMAE_TIMEOUT;
                        goto unlock;
@@ -8420,13 +8422,38 @@ int bnx2x_leader_reset(struct bnx2x *bp)
 {
        int rc = 0;
        bool global = bnx2x_reset_is_global(bp);
+       u32 load_code;
+
+       /* if not going to reset MCP - load "fake" driver to reset HW while
+        * driver is owner of the HW
+        */
+       if (!global && !BP_NOMCP(bp)) {
+               load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ, 0);
+               if (!load_code) {
+                       BNX2X_ERR("MCP response failure, aborting\n");
+                       rc = -EAGAIN;
+                       goto exit_leader_reset;
+               }
+               if ((load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) &&
+                   (load_code != FW_MSG_CODE_DRV_LOAD_COMMON)) {
+                       BNX2X_ERR("MCP unexpected resp, aborting\n");
+                       rc = -EAGAIN;
+                       goto exit_leader_reset2;
+               }
+               load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
+               if (!load_code) {
+                       BNX2X_ERR("MCP response failure, aborting\n");
+                       rc = -EAGAIN;
+                       goto exit_leader_reset2;
+               }
+       }
 
        /* Try to recover after the failure */
        if (bnx2x_process_kill(bp, global)) {
                netdev_err(bp->dev, "Something bad had happen on engine %d! "
                                    "Aii!\n", BP_PATH(bp));
                rc = -EAGAIN;
-               goto exit_leader_reset;
+               goto exit_leader_reset2;
        }
 
        /*
@@ -8437,6 +8464,12 @@ int bnx2x_leader_reset(struct bnx2x *bp)
        if (global)
                bnx2x_clear_reset_global(bp);
 
+exit_leader_reset2:
+       /* unload "fake driver" if it was loaded */
+       if (!global && !BP_NOMCP(bp)) {
+               bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0);
+               bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE, 0);
+       }
 exit_leader_reset:
        bp->is_leader = 0;
        bnx2x_release_leader_lock(bp);
@@ -8473,13 +8506,15 @@ static inline void bnx2x_recovery_failed(struct bnx2x *bp)
 static void bnx2x_parity_recover(struct bnx2x *bp)
 {
        bool global = false;
+       bool is_parity;
 
        DP(NETIF_MSG_HW, "Handling parity\n");
        while (1) {
                switch (bp->recovery_state) {
                case BNX2X_RECOVERY_INIT:
                        DP(NETIF_MSG_HW, "State is BNX2X_RECOVERY_INIT\n");
-                       bnx2x_chk_parity_attn(bp, &global, false);
+                       is_parity = bnx2x_chk_parity_attn(bp, &global, false);
+                       WARN_ON(!is_parity);
 
                        /* Try to get a LEADER_LOCK HW lock */
                        if (bnx2x_trylock_leader_lock(bp)) {
@@ -8503,15 +8538,6 @@ static void bnx2x_parity_recover(struct bnx2x *bp)
 
                        bp->recovery_state = BNX2X_RECOVERY_WAIT;
 
-                       /*
-                        * Reset MCP command sequence number and MCP mail box
-                        * sequence as we are going to reset the MCP.
-                        */
-                       if (global) {
-                               bp->fw_seq = 0;
-                               bp->fw_drv_pulse_wr_seq = 0;
-                       }
-
                        /* Ensure "is_leader", MCP command sequence and
                         * "recovery_state" update values are seen on other
                         * CPUs.
@@ -8595,9 +8621,20 @@ static void bnx2x_parity_recover(struct bnx2x *bp)
                                                return;
                                        }
 
-                                       if (bnx2x_nic_load(bp, LOAD_NORMAL))
-                                               bnx2x_recovery_failed(bp);
-                                       else {
+                                       bp->recovery_state =
+                                               BNX2X_RECOVERY_NIC_LOADING;
+                                       if (bnx2x_nic_load(bp, LOAD_NORMAL)) {
+                                               netdev_err(bp->dev,
+                                                          "Recovery failed. "
+                                                          "Power cycle "
+                                                          "needed\n");
+                                               /* Disconnect this device */
+                                               netif_device_detach(bp->dev);
+                                               /* Shut down the power */
+                                               bnx2x_set_power_state(
+                                                       bp, PCI_D3hot);
+                                               smp_mb();
+                                       } else {
                                                bp->recovery_state =
                                                        BNX2X_RECOVERY_DONE;
                                                smp_mb();
@@ -8851,9 +8888,6 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp)
 
                        /* restore our func and fw_seq */
                        bp->pf_num = orig_pf_num;
-                       bp->fw_seq =
-                             (SHMEM_RD(bp, func_mb[bp->pf_num].drv_mb_header) &
-                               DRV_MSG_SEQ_NUMBER_MASK);
                }
        }
 
@@ -9879,16 +9913,6 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
 
        bnx2x_get_cnic_info(bp);
 
-       /* Get current FW pulse sequence */
-       if (!BP_NOMCP(bp)) {
-               int mb_idx = BP_FW_MB_IDX(bp);
-
-               bp->fw_drv_pulse_wr_seq =
-                               (SHMEM_RD(bp, func_mb[mb_idx].drv_pulse_mb) &
-                                DRV_PULSE_SEQ_MASK);
-               BNX2X_DEV_INFO("drv_pulse 0x%x\n", bp->fw_drv_pulse_wr_seq);
-       }
-
        return rc;
 }
 
@@ -10058,14 +10082,6 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
        if (!BP_NOMCP(bp))
                bnx2x_undi_unload(bp);
 
-       /* init fw_seq after undi_unload! */
-       if (!BP_NOMCP(bp)) {
-               bp->fw_seq =
-                       (SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) &
-                        DRV_MSG_SEQ_NUMBER_MASK);
-               BNX2X_DEV_INFO("fw_seq 0x%08x\n", bp->fw_seq);
-       }
-
        if (CHIP_REV_IS_FPGA(bp))
                dev_err(&bp->pdev->dev, "FPGA detected\n");
 
@@ -11271,13 +11287,6 @@ static void bnx2x_eeh_recover(struct bnx2x *bp)
        if ((val & (SHR_MEM_VALIDITY_DEV_INFO | SHR_MEM_VALIDITY_MB))
                != (SHR_MEM_VALIDITY_DEV_INFO | SHR_MEM_VALIDITY_MB))
                BNX2X_ERR("BAD MCP validity signature\n");
-
-       if (!BP_NOMCP(bp)) {
-               bp->fw_seq =
-                   (SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) &
-                   DRV_MSG_SEQ_NUMBER_MASK);
-               BNX2X_DEV_INFO("fw_seq 0x%08x\n", bp->fw_seq);
-       }
 }
 
 /**
@@ -11533,6 +11542,13 @@ static int bnx2x_cnic_sp_queue(struct net_device *dev,
                return -EIO;
 #endif
 
+       if ((bp->recovery_state != BNX2X_RECOVERY_DONE) &&
+           (bp->recovery_state != BNX2X_RECOVERY_NIC_LOADING)) {
+               netdev_err(dev, "Handling parity error recovery. Try again "
+                               "later\n");
+               return -EAGAIN;
+       }
+
        spin_lock_bh(&bp->spq_lock);
 
        for (i = 0; i < count; i++) {