return (swab32(status) >> 31) != priv->cmd.comm_toggle;
 }
 
-static void mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param)
+static int mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        u32 val;
 
+       /* To avoid writing to unknown addresses after the device state was
+        * changed to internal error and the function was rest,
+        * check the INTERNAL_ERROR flag which is updated under
+        * device_state_mutex lock.
+        */
+       mutex_lock(&dev->persist->device_state_mutex);
+
+       if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+               mutex_unlock(&dev->persist->device_state_mutex);
+               return -EIO;
+       }
+
        priv->cmd.comm_toggle ^= 1;
        val = param | (cmd << 16) | (priv->cmd.comm_toggle << 31);
        __raw_writel((__force u32) cpu_to_be32(val),
                     &priv->mfunc.comm->slave_write);
        mmiowb();
+       mutex_unlock(&dev->persist->device_state_mutex);
+       return 0;
 }
 
 static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param,
 
        /* Write command */
        down(&priv->cmd.poll_sem);
-       mlx4_comm_cmd_post(dev, cmd, param);
+       if (mlx4_comm_cmd_post(dev, cmd, param)) {
+               /* Only in case the device state is INTERNAL_ERROR,
+                * mlx4_comm_cmd_post returns with an error
+                */
+               err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+               goto out;
+       }
 
        end = msecs_to_jiffies(timeout) + jiffies;
        while (comm_pending(dev) && time_before(jiffies, end))
                 * is MLX4_DELAY_RESET_SLAVE*/
                if ((MLX4_COMM_CMD_RESET == cmd)) {
                        err = MLX4_DELAY_RESET_SLAVE;
+                       goto out;
                } else {
-                       mlx4_warn(dev, "Communication channel timed out\n");
-                       err = -ETIMEDOUT;
+                       mlx4_warn(dev, "Communication channel command 0x%x timed out\n",
+                                 cmd);
+                       err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
                }
        }
 
+       if (err)
+               mlx4_enter_error_state(dev->persist);
+out:
        up(&priv->cmd.poll_sem);
        return err;
 }
 
-static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op,
-                             u16 param, unsigned long timeout)
+static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 vhcr_cmd,
+                             u16 param, u16 op, unsigned long timeout)
 {
        struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
        struct mlx4_cmd_context *context;
 
        reinit_completion(&context->done);
 
-       mlx4_comm_cmd_post(dev, op, param);
+       if (mlx4_comm_cmd_post(dev, vhcr_cmd, param)) {
+               /* Only in case the device state is INTERNAL_ERROR,
+                * mlx4_comm_cmd_post returns with an error
+                */
+               err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+               goto out;
+       }
 
        if (!wait_for_completion_timeout(&context->done,
                                         msecs_to_jiffies(timeout))) {
-               mlx4_warn(dev, "communication channel command 0x%x timed out\n",
-                         op);
-               err = -EBUSY;
-               goto out;
+               mlx4_warn(dev, "communication channel command 0x%x (op=0x%x) timed out\n",
+                         vhcr_cmd, op);
+               goto out_reset;
        }
 
        err = context->result;
        if (err && context->fw_status != CMD_STAT_MULTI_FUNC_REQ) {
                mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
-                        op, context->fw_status);
-               goto out;
+                        vhcr_cmd, context->fw_status);
+               if (mlx4_closing_cmd_fatal_error(op, context->fw_status))
+                       goto out_reset;
        }
 
-out:
        /* wait for comm channel ready
         * this is necessary for prevention the race
         * when switching between event to polling mode
+        * Skipping this section in case the device is in FATAL_ERROR state,
+        * In this state, no commands are sent via the comm channel until
+        * the device has returned from reset.
         */
-       end = msecs_to_jiffies(timeout) + jiffies;
-       while (comm_pending(dev) && time_before(jiffies, end))
-               cond_resched();
+       if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) {
+               end = msecs_to_jiffies(timeout) + jiffies;
+               while (comm_pending(dev) && time_before(jiffies, end))
+                       cond_resched();
+       }
+       goto out;
 
+out_reset:
+       err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+       mlx4_enter_error_state(dev->persist);
+out:
        spin_lock(&cmd->context_lock);
        context->next = cmd->free_head;
        cmd->free_head = context - cmd->context;
 }
 
 int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
-                 unsigned long timeout)
+                 u16 op, unsigned long timeout)
 {
+       if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+               return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+
        if (mlx4_priv(dev)->cmd.use_events)
-               return mlx4_comm_cmd_wait(dev, cmd, param, timeout);
+               return mlx4_comm_cmd_wait(dev, cmd, param, op, timeout);
        return mlx4_comm_cmd_poll(dev, cmd, param, timeout);
 }
 
                        }
                        ret = mlx4_status_to_errno(vhcr->status);
                }
+               if (ret &&
+                   dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+                       ret = mlx4_internal_err_ret_value(dev, op, op_modifier);
        } else {
-               ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0,
+               ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, op,
                                    MLX4_COMM_TIME + timeout);
                if (!ret) {
                        if (out_is_imm) {
                                }
                        }
                        ret = mlx4_status_to_errno(vhcr->status);
-               } else
-                       mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n",
-                                op);
+               } else {
+                       if (dev->persist->state &
+                           MLX4_DEVICE_STATE_INTERNAL_ERROR)
+                               ret = mlx4_internal_err_ret_value(dev, op,
+                                                                 op_modifier);
+                       else
+                               mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n", op);
+               }
        }
 
        mutex_unlock(&priv->cmd.slave_cmd_mutex);
                                      ALIGN(sizeof(struct mlx4_vhcr_cmd),
                                            MLX4_ACCESS_MEM_ALIGN), 1);
                if (ret) {
-                       mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n",
-                                __func__, ret);
+                       if (!(dev->persist->state &
+                           MLX4_DEVICE_STATE_INTERNAL_ERROR))
+                               mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n",
+                                        __func__, ret);
                        kfree(vhcr);
                        return ret;
                }
                        goto out_status;
                }
 
-               if (mlx4_ACCESS_MEM(dev, inbox->dma, slave,
-                                   vhcr->in_param,
-                                   MLX4_MAILBOX_SIZE, 1)) {
-                       mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n",
-                                __func__, cmd->opcode);
+               ret = mlx4_ACCESS_MEM(dev, inbox->dma, slave,
+                                     vhcr->in_param,
+                                     MLX4_MAILBOX_SIZE, 1);
+               if (ret) {
+                       if (!(dev->persist->state &
+                           MLX4_DEVICE_STATE_INTERNAL_ERROR))
+                               mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n",
+                                        __func__, cmd->opcode);
                        vhcr_cmd->status = CMD_STAT_INTERNAL_ERR;
                        goto out_status;
                }
        }
 
        if (err) {
-               mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n",
-                         vhcr->op, slave, vhcr->errno, err);
+               if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR))
+                       mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n",
+                                 vhcr->op, slave, vhcr->errno, err);
                vhcr_cmd->status = mlx4_errno_to_status(err);
                goto out_status;
        }
                        /* If we failed to write back the outbox after the
                         *command was successfully executed, we must fail this
                         * slave, as it is now in undefined state */
-                       mlx4_err(dev, "%s:Failed writing outbox\n", __func__);
+                       if (!(dev->persist->state &
+                           MLX4_DEVICE_STATE_INTERNAL_ERROR))
+                               mlx4_err(dev, "%s:Failed writing outbox\n", __func__);
                        goto out;
                }
        }
 
        struct mlx4_priv *priv = mlx4_priv(dev);
 
        mutex_lock(&priv->cmd.slave_cmd_mutex);
-       if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
+       if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP,
+                         MLX4_COMM_TIME))
                mlx4_warn(dev, "Failed to close slave function\n");
        mutex_unlock(&priv->cmd.slave_cmd_mutex);
 }
        mlx4_reset_vf_support(dev);
        mlx4_warn(dev, "Sending reset\n");
        ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
-                                      MLX4_COMM_TIME);
+                                      MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME);
        /* if we are in the middle of flr the slave will try
         * NUM_OF_RESET_RETRIES times before leaving.*/
        if (ret_from_reset) {
 
        mlx4_warn(dev, "Sending vhcr0\n");
        if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
-                                                   MLX4_COMM_TIME))
+                            MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
                goto err;
        if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
-                                                   MLX4_COMM_TIME))
+                            MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
                goto err;
        if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
-                                                   MLX4_COMM_TIME))
+                            MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
                goto err;
-       if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
+       if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma,
+                         MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
                goto err;
 
        mutex_unlock(&priv->cmd.slave_cmd_mutex);
        return 0;
 
 err:
-       mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
+       mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0);
 err_offline:
        mutex_unlock(&priv->cmd.slave_cmd_mutex);
        return -EIO;