From 5e8a36a541a033c80d11f8f062c03c340e242acd Mon Sep 17 00:00:00 2001 From: Dan Duval Date: Fri, 2 Sep 2016 12:56:30 -0400 Subject: [PATCH] mptsas: add TUR with retries to ensure LUNs complete initialization Orabug: 24745062 Earlier versions of the mptsas driver included a mechanism for executing, and if necessary retrying, SCSI TEST UNIT READY commands to ensure that devices complete their initialization during device discovery. This functionality, present in UEK2, was never sent upstream, and was lost when UEK4 was initiated. We have been seeing flash devices returning errors, or simply disappearing, during alter cell validate configuration operations on Exadata systems. Giving the flash disks time to initialize after (re-) discovery appears to resolve this issue. This commit simply restores the missing functionality. Signed-off-by: Dan Duval Reviewed-by: Martin K. Petersen --- drivers/message/fusion/mptsas.c | 259 +++++++++++++++++++++++++++++- drivers/message/fusion/mptsas.h | 30 ++++ drivers/message/fusion/mptscsih.c | 5 +- drivers/message/fusion/mptscsih.h | 1 + 4 files changed, 292 insertions(+), 3 deletions(-) diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index 5bdaae15a7425..0dcd0f3cf7efb 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -97,6 +97,11 @@ module_param(mpt_loadtime_max_sectors, int, 0); MODULE_PARM_DESC(mpt_loadtime_max_sectors, " Maximum sector define for Host Bus Adaptor.Range 64 to 8192 default=8192"); +static int mpt_cmd_retry_count = 300; +module_param(mpt_cmd_retry_count, int, 0); +MODULE_PARM_DESC(mpt_cmd_retry_count, + " Device discovery TUR command retry count: default=300"); + static u8 mptsasDoneCtx = MPT_MAX_PROTOCOL_DRIVERS; static u8 mptsasTaskCtx = MPT_MAX_PROTOCOL_DRIVERS; static u8 mptsasInternalCtx = MPT_MAX_PROTOCOL_DRIVERS; /* Used only for internal commands */ @@ -1338,6 +1343,7 @@ enum device_state{ DEVICE_RETRY, DEVICE_ERROR, DEVICE_READY, + DEVICE_START_UNIT, }; static int @@ -1680,7 +1686,246 @@ mptsas_firmware_event_work(struct work_struct *work) } } +/** + * mptsas_get_lun_number - returns the first entry in report_luns table + * @ioc: Pointer to MPT_ADAPTER structure + * @channel: + * @id: + * @lun: + * + */ +static int +mptsas_get_lun_number(MPT_ADAPTER *ioc, u8 channel, u8 id, int *lun) +{ + INTERNAL_CMD *iocmd; + struct scsi_lun *lun_data; + dma_addr_t lun_data_dma; + u32 lun_data_len; + u8 *data; + MPT_SCSI_HOST *hd; + int rc; + u32 length, num_luns; + + iocmd = NULL; + hd = shost_priv(ioc->sh); + lun_data_len = (255 * sizeof(struct scsi_lun)); + lun_data = pci_alloc_consistent(ioc->pcidev, lun_data_len, + &lun_data_dma); + if (!lun_data) { + printk(MYIOC_s_ERR_FMT "%s: pci_alloc_consistent(%d) FAILED!\n", + ioc->name, __FUNCTION__, lun_data_len); + rc = -ENOMEM; + goto out; + } + + iocmd = kzalloc(sizeof(INTERNAL_CMD), GFP_KERNEL); + if (!iocmd) { + printk(MYIOC_s_ERR_FMT "%s: kzalloc(%zd) FAILED!\n", + ioc->name, __FUNCTION__, sizeof(INTERNAL_CMD)); + rc = -ENOMEM; + goto out; + } + + /* + * Report Luns + */ + iocmd->cmd = REPORT_LUNS; + iocmd->data_dma = lun_data_dma; + iocmd->data = (u8 *)lun_data; + iocmd->size = lun_data_len; + iocmd->channel = channel; + iocmd->id = id; + + if ((rc = mptscsih_do_cmd(hd, iocmd)) < 0) { + printk(MYIOC_s_ERR_FMT "%s: fw_channel=%d fw_id=%d: " + "report_luns failed due to rc=0x%x\n", ioc->name, + __FUNCTION__, channel, id, rc); + goto out; + } + + if (rc != MPT_SCANDV_GOOD) { + printk(MYIOC_s_ERR_FMT "%s: fw_channel=%d fw_id=%d: " + "report_luns failed due to rc=0x%x\n", ioc->name, + __FUNCTION__, channel, id, rc); + rc = -rc; + goto out; + } + + data = (u8 *)lun_data; + length = ((data[0] << 24) | (data[1] << 16) | + (data[2] << 8) | (data[3] << 0)); + + num_luns = (length / sizeof(struct scsi_lun)); + if (!num_luns) + goto out; + /* return 1st lun in the list */ + *lun = mpt_scsilun_to_int(&lun_data[1]); + +#if 0 + /* some debugging, left commented out */ + { + struct scsi_lun *lunp; + for (lunp = &lun_data[1]; lunp <= &lun_data[num_luns]; lunp++) + printk("%x\n", scsilun_to_int(lunp)); + } +#endif + out: + if (lun_data) + pci_free_consistent(ioc->pcidev, lun_data_len, lun_data, + lun_data_dma); + kfree(iocmd); + return rc; +} + +/** + * mptsas_test_unit_ready - + * @ioc: Pointer to MPT_ADAPTER structure + * @channel: + * @id: + * @count: retry count + * + */ +enum device_state +mptsas_test_unit_ready(MPT_ADAPTER *ioc, u8 channel, u8 id, u16 count) +{ + INTERNAL_CMD *iocmd; + MPT_SCSI_HOST *hd = shost_priv(ioc->sh); + enum device_state state; + int rc; + u8 skey, asc, ascq; + u8 retry_ua; + + if (count >= mpt_cmd_retry_count) + return DEVICE_ERROR; + + retry_ua = 0; + iocmd = kzalloc(sizeof(INTERNAL_CMD), GFP_KERNEL); + if (!iocmd) { + printk(MYIOC_s_ERR_FMT "%s: kzalloc(%zd) FAILED!\n", + __FUNCTION__, ioc->name, sizeof(INTERNAL_CMD)); + return DEVICE_ERROR; + } + + state = DEVICE_ERROR; + iocmd->cmd = TEST_UNIT_READY; + iocmd->data_dma = -1; + iocmd->data = NULL; + + if (mptscsih_is_phys_disk(ioc, channel, id)) { + iocmd->flags |= MPT_ICFLAG_PHYS_DISK; + iocmd->physDiskNum = mptscsih_raid_id_to_num(ioc, channel, id); + } + iocmd->channel = channel; + iocmd->id = id; + + retry: + devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: fw_channel=%d " + "fw_id=%d retry=%d\n", ioc->name, __FUNCTION__, channel, id, count)); + rc = mptscsih_do_cmd(hd, iocmd); + devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: rc=0x%02x\n", + ioc->name, __FUNCTION__, rc)); + if (rc < 0) { + printk(MYIOC_s_ERR_FMT "%s: fw_channel=%d fw_id=%d: " + "tur failed due to timeout\n", ioc->name, + __FUNCTION__, channel, id); + goto tur_done; + } + + switch(rc) { + case MPT_SCANDV_GOOD: + state = DEVICE_READY; + goto tur_done; + case MPT_SCANDV_BUSY: + devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: " + "fw_channel=%d fw_id=%d : device busy\n", + ioc->name, __FUNCTION__, channel, id)); + state = DEVICE_RETRY; + break; + case MPT_SCANDV_DID_RESET: + devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: " + "fw_channel=%d fw_id=%d : did reset\n", + ioc->name, __FUNCTION__, channel, id)); + state = DEVICE_RETRY; + break; + case MPT_SCANDV_SENSE: + skey = ioc->internal_cmds.sense[2] & 0x0F; + asc = ioc->internal_cmds.sense[12]; + ascq = ioc->internal_cmds.sense[13]; + + devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: " + "fw_channel=%d fw_id=%d : [sense_key,asc," + "ascq]: [0x%02x,0x%02x,0x%02x]\n", ioc->name, + __FUNCTION__, channel, id, skey, asc, ascq)); + + if (skey == UNIT_ATTENTION) { + state = DEVICE_RETRY; + break; + } else if (skey == NOT_READY) { + /* medium isn't present */ + if (asc == 0x3a) { + state = DEVICE_READY; + goto tur_done; + } + /* LOGICAL UNIT NOT READY */ + else if (asc == 0x04) { + if (ascq == 0x03 || + ascq == 0x0b || + ascq == 0x0c) { + state = DEVICE_ERROR; + } else { + state = DEVICE_START_UNIT; + break; + } + } + /* LOGICAL UNIT HAS NOT SELF-CONFIGURED YET */ + else if (asc == 0x3e && !ascq) { + state = DEVICE_START_UNIT; + break; + } + } else if (skey == ILLEGAL_REQUEST) { + /* try sending a tur to a non-zero lun number */ + if (!iocmd->lun && !mptsas_get_lun_number(ioc, + channel, id, &iocmd->lun) && iocmd->lun) + goto retry; + } + printk(MYIOC_s_ERR_FMT "%s: fw_channel=%d fw_id=%d : " + "tur failed due to [sense_key,asc,ascq]: " + "[0x%02x,0x%02x,0x%02x]\n", ioc->name, + __FUNCTION__, channel, id, skey, asc, ascq); + goto tur_done; + case MPT_SCANDV_SELECTION_TIMEOUT: + printk(MYIOC_s_ERR_FMT "%s: fw_channel=%d fw_id=%d: " + "tur failed due to no device\n", ioc->name, + __FUNCTION__, channel, + id); + goto tur_done; + case MPT_SCANDV_SOME_ERROR: + printk(MYIOC_s_ERR_FMT "%s: fw_channel=%d fw_id=%d: " + "tur failed due to some error\n", ioc->name, + __FUNCTION__, + channel, id); + goto tur_done; + default: + printk(MYIOC_s_ERR_FMT + "%s: fw_channel=%d fw_id=%d: tur failed due to " + "unknown rc=0x%02x\n", ioc->name, __FUNCTION__, + channel, id, rc ); + goto tur_done; + } + tur_done: + /* Try Sending START_STOP scsi command */ + if(state == DEVICE_START_UNIT) { + iocmd->cmd = START_STOP; + rc = mptscsih_do_cmd(hd, iocmd); + devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: rc=0x%02x\n", + ioc->name, __FUNCTION__, rc)); + /* No need to check return value rc, since TUR is going to be retried */ + state = DEVICE_RETRY; + } + kfree(iocmd); + return state; +} static int mptsas_slave_configure(struct scsi_device *sdev) @@ -3950,9 +4195,11 @@ mptsas_probe_expanders(MPT_ADAPTER *ioc) static void mptsas_probe_devices(MPT_ADAPTER *ioc) { + u16 retry_count; u16 handle; struct mptsas_devinfo sas_device; struct mptsas_phyinfo *phy_info; + enum device_state state; handle = 0xFFFF; while (!(mptsas_sas_device_pg0(ioc, &sas_device, @@ -3980,7 +4227,17 @@ mptsas_probe_devices(MPT_ADAPTER *ioc) if (mptsas_get_rphy(phy_info)) continue; - mptsas_add_end_device(ioc, phy_info); + state = DEVICE_RETRY; + retry_count = 0; + while(state == DEVICE_RETRY) { + state = mptsas_test_unit_ready(ioc, sas_device.channel, + sas_device.id, retry_count++); + ssleep(1); + } + if (state == DEVICE_READY) + mptsas_add_end_device(ioc, phy_info); + else + memset(&phy_info->attached, 0, sizeof(struct mptsas_devinfo)); } } diff --git a/drivers/message/fusion/mptsas.h b/drivers/message/fusion/mptsas.h index c396483d36245..c97092ed7128a 100644 --- a/drivers/message/fusion/mptsas.h +++ b/drivers/message/fusion/mptsas.h @@ -188,5 +188,35 @@ struct mptsas_enclosure { u8 sep_channel; /* SEP channel logical channel id */ }; +/** + * mpt_scsilun_to_int: convert a scsi_lun to an int + * @scsilun: struct scsi_lun to be converted. + * + * Description: + * Convert @scsilun from a struct scsi_lun to a four byte host byte-ordered + * integer, and return the result. The caller must check for + * truncation before using this function. + * + * Notes: + * The struct scsi_lun is assumed to be four levels, with each level + * effectively containing a SCSI byte-ordered (big endian) short; the + * addressing bits of each level are ignored (the highest two bits). + * For a description of the LUN format, post SCSI-3 see the SCSI + * Architecture Model, for SCSI-3 see the SCSI Controller Commands. + * + * Given a struct scsi_lun of: 0a 04 0b 03 00 00 00 00, this function returns + * the integer: 0x0b030a04 + **/ +static inline int mpt_scsilun_to_int(struct scsi_lun *scsilun) +{ + int i; + unsigned int lun; + + lun = 0; + for (i = 0; i < sizeof(lun); i += 2) + lun = lun | (((scsilun->scsi_lun[i] << 8) | + scsilun->scsi_lun[i + 1]) << (i * 8)); + return lun; +} /*}-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ #endif diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index 6c9fc11efb872..420cdd94785e1 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -105,7 +105,7 @@ mptscsih_taskmgmt_response_code(MPT_ADAPTER *ioc, u8 response_code); static int mptscsih_get_completion_code(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply); int mptscsih_scandv_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *r); -static int mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *iocmd); +int mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *iocmd); static void mptscsih_synchronize_cache(MPT_SCSI_HOST *hd, VirtDevice *vdevice); static int @@ -2777,7 +2777,7 @@ mptscsih_get_completion_code(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req, * * > 0 if command complete but some type of completion error. */ -static int +int mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io) { MPT_FRAME_HDR *mf; @@ -3000,6 +3000,7 @@ mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io) mutex_unlock(&ioc->internal_cmds.mutex); return ret; } +EXPORT_SYMBOL(mptscsih_do_cmd); /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h index 2baeefd9be7a2..e18f97ad23e4b 100644 --- a/drivers/message/fusion/mptscsih.h +++ b/drivers/message/fusion/mptscsih.h @@ -131,6 +131,7 @@ extern int mptscsih_ioc_reset(MPT_ADAPTER *ioc, int post_reset); extern int mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth); extern u8 mptscsih_raid_id_to_num(MPT_ADAPTER *ioc, u8 channel, u8 id); extern int mptscsih_is_phys_disk(MPT_ADAPTER *ioc, u8 channel, u8 id); +extern int mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *iocmd); extern struct device_attribute *mptscsih_host_attrs[]; extern struct scsi_cmnd *mptscsih_get_scsi_lookup(MPT_ADAPTER *ioc, int i); extern void mptscsih_taskmgmt_response_code(MPT_ADAPTER *ioc, u8 response_code); -- 2.50.1