From 0fe0f65ca16a0d1f68de7c60f09c10b09b0e7f22 Mon Sep 17 00:00:00 2001 From: Ashok Vairavan Date: Wed, 23 Nov 2016 14:31:21 -0800 Subject: [PATCH] NVMe: reduce queue depth as workaround for Samsung EPIC SQ errata Orabug: 25138123 Oracle discovered that the NVMe driver gets SQ completion errors eventually leading to the device being reset, taken out of the PCI bus tree or kernel panics when using the default SQ size of 1024 entries (64KB) for Samsung EPIC NVMe SSDs. PCIe analyzer tracing by Oracle and Samsung revealed an errata in Samsung's firmware for EPIC SSDs where these invalid completion entries can occur when the queues straddle an 8MB DMA address boundary. This patch works around the errata by detecting these specific devices and limiting their descriptor queue depth to 64. This is only for the Samsung NVMe controllers used in Oracle X-series servers. There was no noticeable performance impact of reducing queue depths to 64 for these Samsung drives, Oracle X6-2 server, and Oracle VM Server 3.4.2. Signed-off-by: Kyle Fortin Signed-off-by: Bhavesh Davda Reviewed-by: Martin K. Petersen --- drivers/nvme/host/pci.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4bf792ee8383..73619d6c3d9b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1684,6 +1684,17 @@ static int nvme_dev_map(struct nvme_dev *dev) dev->q_depth); } + /* + * Limit q_depth for Samsung NVMe SSDs. The depth and descriptor + * size should not span a DMA page (64 x 64B) unless NVME_CAP_MQES(cap) + * already restricted it further. + */ + if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG && pdev->device == 0xa821 ) { + dev->q_depth = min_t(int, dev->q_depth, 64); + dev_warn(dev->dev, "detected Samsung NVMe controller, limit " + "queue depth=%u.\n", dev->q_depth); + } + if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2)) dev->cmb = nvme_map_cmb(dev); -- 2.50.1