From: Bjorn Helgaas Date: Mon, 26 Jun 2023 17:59:56 +0000 (-0500) Subject: Merge branch 'pci/enumeration' X-Git-Tag: dma-mapping-6.6-2023-08-29~244^2~15 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=1abb47390350a1bd5430390e296492e6248865b2;p=users%2Fhch%2Fdma-mapping.git Merge branch 'pci/enumeration' - Add PCI_EXT_CAP_ID_PL_32GT define (Ben Dooks) - Propagate firmware node by calling device_set_node() for better modularity (Andy Shevchenko) - Discover Data Link Layer Link Active Reporting earlier so quirks can take advantage of it (Maciej W. Rozycki) - Use cached Data Link Layer Link Active Reporting capability in pciehp, powerpc/eeh, and mlx5 (Maciej W. Rozycki) - Run quirk for devices that require OS to clear Retrain Link earlier, so later quirks can rely on it (Maciej W. Rozycki) - Export pcie_retrain_link() for use outside ASPM (Maciej W. Rozycki) - Add Data Link Layer Link Active Reporting as another way for pcie_retrain_link() to determine the link is up (Maciej W. Rozycki) - Work around link training failures (especially on the ASMedia ASM2824 switch) by training first at 2.5GT/s and then attempting higher rates (Maciej W. Rozycki) * pci/enumeration: PCI: Add failed link recovery for device reset events PCI: Work around PCIe link training failures PCI: Use pcie_wait_for_link_status() in pcie_wait_for_link_delay() PCI: Add support for polling DLLLA to pcie_retrain_link() PCI: Export pcie_retrain_link() for use outside ASPM PCI: Export PCIe link retrain timeout PCI: Execute quirk_enable_clear_retrain_link() earlier PCI/ASPM: Factor out waiting for link training to complete PCI/ASPM: Avoid unnecessary pcie_link_state use PCI/ASPM: Use distinct local vars in pcie_retrain_link() net/mlx5: Rely on dev->link_active_reporting powerpc/eeh: Rely on dev->link_active_reporting PCI: pciehp: Rely on dev->link_active_reporting PCI: Initialize dev->link_active_reporting earlier PCI: of: Propagate firmware node by calling device_set_node() PCI: Add PCI_EXT_CAP_ID_PL_32GT define # Conflicts: # drivers/pci/pcie/aspm.c --- 1abb47390350a1bd5430390e296492e6248865b2 diff --cc drivers/pci/pci.c index 5ede93222bc1,64f1a87902d8..9afb6114a4cf --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@@ -4856,6 -4875,67 +4875,79 @@@ static int pci_pm_reset(struct pci_dev return pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS); } + /** + * pcie_wait_for_link_status - Wait for link status change + * @pdev: Device whose link to wait for. + * @use_lt: Use the LT bit if TRUE, or the DLLLA bit if FALSE. + * @active: Waiting for active or inactive? + * - * Return TRUE if successful, or FALSE if status has not changed within ++ * Return 0 if successful, or -ETIMEDOUT if status has not changed within + * PCIE_LINK_RETRAIN_TIMEOUT_MS milliseconds. + */ -static bool pcie_wait_for_link_status(struct pci_dev *pdev, - bool use_lt, bool active) ++static int pcie_wait_for_link_status(struct pci_dev *pdev, ++ bool use_lt, bool active) + { + u16 lnksta_mask, lnksta_match; + unsigned long end_jiffies; + u16 lnksta; + + lnksta_mask = use_lt ? PCI_EXP_LNKSTA_LT : PCI_EXP_LNKSTA_DLLLA; + lnksta_match = active ? lnksta_mask : 0; + + end_jiffies = jiffies + msecs_to_jiffies(PCIE_LINK_RETRAIN_TIMEOUT_MS); + do { + pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta); + if ((lnksta & lnksta_mask) == lnksta_match) - break; ++ return 0; + msleep(1); + } while (time_before(jiffies, end_jiffies)); - return (lnksta & lnksta_mask) == lnksta_match; ++ ++ return -ETIMEDOUT; + } + + /** + * pcie_retrain_link - Request a link retrain and wait for it to complete + * @pdev: Device whose link to retrain. + * @use_lt: Use the LT bit if TRUE, or the DLLLA bit if FALSE, for status. + * + * Retrain completion status is retrieved from the Link Status Register + * according to @use_lt. It is not verified whether the use of the DLLLA + * bit is valid. + * - * Return TRUE if successful, or FALSE if training has not completed ++ * Return 0 if successful, or -ETIMEDOUT if training has not completed + * within PCIE_LINK_RETRAIN_TIMEOUT_MS milliseconds. + */ -bool pcie_retrain_link(struct pci_dev *pdev, bool use_lt) ++int pcie_retrain_link(struct pci_dev *pdev, bool use_lt) + { ++ int rc; + u16 lnkctl; + ++ /* ++ * Ensure the updated LNKCTL parameters are used during link ++ * training by checking that there is no ongoing link training to ++ * avoid LTSSM race as recommended in Implementation Note at the ++ * end of PCIe r6.0.1 sec 7.5.3.7. ++ */ ++ rc = pcie_wait_for_link_status(pdev, use_lt, !use_lt); ++ if (rc) ++ return rc; ++ + pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &lnkctl); + lnkctl |= PCI_EXP_LNKCTL_RL; + pcie_capability_write_word(pdev, PCI_EXP_LNKCTL, lnkctl); + if (pdev->clear_retrain_link) { + /* + * Due to an erratum in some devices the Retrain Link bit + * needs to be cleared again manually to allow the link + * training to succeed. + */ + lnkctl &= ~PCI_EXP_LNKCTL_RL; + pcie_capability_write_word(pdev, PCI_EXP_LNKCTL, lnkctl); + } + + return pcie_wait_for_link_status(pdev, use_lt, !use_lt); + } + /** * pcie_wait_for_link_delay - Wait until link is active or inactive * @pdev: Bridge device @@@ -4867,9 -4947,7 +4959,7 @@@ static bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active, int delay) { - int timeout = 1000; -- bool ret; - u16 lnk_status; ++ int rc; /* * Some controllers might not implement link active reporting. In this @@@ -4891,20 -4969,13 +4981,21 @@@ */ if (active) msleep(20); - for (;;) { - pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status); - ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA); - if (ret == active) - break; - if (timeout <= 0) - break; - msleep(10); - timeout -= 10; - } - ret = pcie_wait_for_link_status(pdev, false, active); - if (active && !ret) - ret = pcie_failed_link_retrain(pdev); -- if (active && ret) ++ rc = pcie_wait_for_link_status(pdev, false, active); ++ if (active) { ++ if (rc) ++ rc = pcie_failed_link_retrain(pdev); ++ if (rc) ++ return false; ++ msleep(delay); ++ return true; ++ } + - return ret; ++ if (rc) ++ return false; + - return ret == active; ++ return true; } /** diff --cc drivers/pci/pci.h index a97a735e6623,e3a468a58cd2..9e9bdff38f6d --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@@ -563,6 -570,7 +570,7 @@@ pci_ers_result_t pcie_do_recovery(struc pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev)); bool pcie_wait_for_link(struct pci_dev *pdev, bool active); -bool pcie_retrain_link(struct pci_dev *pdev, bool use_lt); ++int pcie_retrain_link(struct pci_dev *pdev, bool use_lt); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); diff --cc drivers/pci/pcie/aspm.c index 3aa73ecdf86f,99b8badddea5..3dafba0b5f41 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@@ -308,15 -257,15 +257,15 @@@ static void pcie_aspm_configure_common_ reg16 &= ~PCI_EXP_LNKCTL_CCC; pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16); - if (pcie_retrain_link(link)) { - if (pcie_retrain_link(link->pdev, true)) - return; ++ if (pcie_retrain_link(link->pdev, true)) { - /* Training failed. Restore common clock configurations */ - pci_err(parent, "ASPM: Could not configure common clock\n"); - list_for_each_entry(child, &linkbus->devices, bus_list) - pcie_capability_write_word(child, PCI_EXP_LNKCTL, + /* Training failed. Restore common clock configurations */ + pci_err(parent, "ASPM: Could not configure common clock\n"); + list_for_each_entry(child, &linkbus->devices, bus_list) + pcie_capability_write_word(child, PCI_EXP_LNKCTL, child_reg[PCI_FUNC(child->devfn)]); - pcie_capability_write_word(parent, PCI_EXP_LNKCTL, parent_reg); + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, parent_reg); + } } /* Convert L0s latency encoding to ns */ diff --cc drivers/pci/quirks.c index f4e2a88729fd,a46678563b33..1c4f715537c1 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@@ -33,6 -33,99 +33,99 @@@ #include #include "pci.h" + /* + * Retrain the link of a downstream PCIe port by hand if necessary. + * + * This is needed at least where a downstream port of the ASMedia ASM2824 + * Gen 3 switch is wired to the upstream port of the Pericom PI7C9X2G304 + * Gen 2 switch, and observed with the Delock Riser Card PCI Express x1 > + * 2 x PCIe x1 device, P/N 41433, plugged into the SiFive HiFive Unmatched + * board. + * + * In such a configuration the switches are supposed to negotiate the link + * speed of preferably 5.0GT/s, falling back to 2.5GT/s. However the link + * continues switching between the two speeds indefinitely and the data + * link layer never reaches the active state, with link training reported + * repeatedly active ~84% of the time. Forcing the target link speed to + * 2.5GT/s with the upstream ASM2824 device makes the two switches talk to + * each other correctly however. And more interestingly retraining with a + * higher target link speed afterwards lets the two successfully negotiate + * 5.0GT/s. + * + * With the ASM2824 we can rely on the otherwise optional Data Link Layer + * Link Active status bit and in the failed link training scenario it will + * be off along with the Link Bandwidth Management Status indicating that + * hardware has changed the link speed or width in an attempt to correct + * unreliable link operation. For a port that has been left unconnected + * both bits will be clear. So use this information to detect the problem + * rather than polling the Link Training bit and watching out for flips or + * at least the active status. + * + * Since the exact nature of the problem isn't known and in principle this + * could trigger where an ASM2824 device is downstream rather upstream, + * apply this erratum workaround to any downstream ports as long as they + * support Link Active reporting and have the Link Control 2 register. + * Restrict the speed to 2.5GT/s then with the Target Link Speed field, + * request a retrain and wait 200ms for the data link to go up. + * + * If this turns out successful and we know by the Vendor:Device ID it is + * safe to do so, then lift the restriction, letting the devices negotiate + * a higher speed. Also check for a similar 2.5GT/s speed restriction the + * firmware may have already arranged and lift it with ports that already + * report their data link being up. + * + * Return TRUE if the link has been successfully retrained, otherwise FALSE. + */ + bool pcie_failed_link_retrain(struct pci_dev *dev) + { + static const struct pci_device_id ids[] = { + { PCI_VDEVICE(ASMEDIA, 0x2824) }, /* ASMedia ASM2824 */ + {} + }; + u16 lnksta, lnkctl2; + + if (!pci_is_pcie(dev) || !pcie_downstream_port(dev) || + !pcie_cap_has_lnkctl2(dev) || !dev->link_active_reporting) + return false; + + pcie_capability_read_word(dev, PCI_EXP_LNKCTL2, &lnkctl2); + pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); + if ((lnksta & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_DLLLA)) == + PCI_EXP_LNKSTA_LBMS) { + pci_info(dev, "broken device, retraining non-functional downstream link at 2.5GT/s\n"); + + lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS; + lnkctl2 |= PCI_EXP_LNKCTL2_TLS_2_5GT; + pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2); + - if (!pcie_retrain_link(dev, false)) { ++ if (pcie_retrain_link(dev, false)) { + pci_info(dev, "retraining failed\n"); + return false; + } + + pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); + } + + if ((lnksta & PCI_EXP_LNKSTA_DLLLA) && + (lnkctl2 & PCI_EXP_LNKCTL2_TLS) == PCI_EXP_LNKCTL2_TLS_2_5GT && + pci_match_id(ids, dev)) { + u32 lnkcap; + + pci_info(dev, "removing 2.5GT/s downstream link speed restriction\n"); + pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap); + lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS; + lnkctl2 |= lnkcap & PCI_EXP_LNKCAP_SLS; + pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2); + - if (!pcie_retrain_link(dev, false)) { ++ if (pcie_retrain_link(dev, false)) { + pci_info(dev, "retraining failed\n"); + return false; + } + } + + return true; + } + static ktime_t fixup_debug_start(struct pci_dev *dev, void (*fn)(struct pci_dev *dev)) {