From 57ed93fe799b3564388fd61279fc274ae2716508 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:38 +0900 Subject: [PATCH 01/16] PCI: rockchip-ep: Improve rockchip_pcie_ep_unmap_addr() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit There is no need to loop over all regions to find the memory window used to map an address. We can use rockchip_ob_region() to determine the region index, together with a check that the address passed as argument is the address used to create the mapping. Furthermore, the ob_region_map bitmap should also be checked to ensure that we are not attempting to unmap an address that is not mapped. Link: https://lore.kernel.org/r/20241017015849.190271-4-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 65014681d859..b4db5459727f 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -259,13 +259,9 @@ static void rockchip_pcie_ep_unmap_addr(struct pci_epc *epc, u8 fn, u8 vfn, { struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); struct rockchip_pcie *rockchip = &ep->rockchip; - u32 r; - - for (r = 0; r < ep->max_regions; r++) - if (ep->ob_addr[r] == addr) - break; + u32 r = rockchip_ob_region(addr); - if (r == ep->max_regions) + if (addr != ep->ob_addr[r] || !test_bit(r, &ep->ob_region_map)) return; rockchip_pcie_clear_ep_ob_atu(rockchip, r); -- 2.51.0 From d8dbd21cfafd653b02ec187e59e0bcfaebd9a884 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:39 +0900 Subject: [PATCH 02/16] PCI: rockchip-ep: Improve rockchip_pcie_ep_map_addr() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add a check to verify that the outbound region to be used for mapping an address is not already in use. Link: https://lore.kernel.org/r/20241017015849.190271-5-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/pcie-rockchip-ep.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index b4db5459727f..24ee1df17a49 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -246,6 +246,9 @@ static int rockchip_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, u8 vfn, struct rockchip_pcie *pcie = &ep->rockchip; u32 r = rockchip_ob_region(addr); + if (test_bit(r, &ep->ob_region_map)) + return -EBUSY; + rockchip_pcie_prog_ep_ob_atu(pcie, fn, r, addr, pci_addr, size); set_bit(r, &ep->ob_region_map); -- 2.51.0 From b21255326db2d736089b3df26c3cd4e4a131d75f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:40 +0900 Subject: [PATCH 03/16] PCI: rockchip-ep: Implement the pci_epc_ops::align_addr() operation MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The Rockchip PCIe endpoint controller handles PCIe transfers addresses by masking the lower bits of the programmed PCI address and using the same number of lower bits from the CPU address space used for the mapping. For a PCI mapping of size bytes starting from pci_addr, the number of bits masked is the number of address bits changing in the address range [pci_addr..pci_addr + size - 1], up to 20 bits, that is, up to 1MB mappings. This means that when preparing a PCI address mapping, an endpoint function driver must use an offset into the allocated controller memory region that is equal to the mask of the starting PCI address over rockchip_pcie_ep_ob_atu_num_bits() bits. This offset also determines the maximum size of the mapping given the starting PCI address and the fixed 1MB controller memory window size. Implement the ->align_addr() endpoint controller operation to allow the mapping alignment to be transparently handled by endpoint function drivers through the function pci_epc_mem_map(). Link: https://lore.kernel.org/r/20241017015849.190271-6-dlemoal@kernel.org Co-developed-by: Rick Wertenbroek Signed-off-by: Rick Wertenbroek Signed-off-by: Damien Le Moal [kwilczynski: change local variable name for address offset] Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 23 +++++++++++++++++++++++ drivers/pci/controller/pcie-rockchip.h | 5 +++++ 2 files changed, 28 insertions(+) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 24ee1df17a49..5a4279591f79 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -238,6 +238,28 @@ static inline u32 rockchip_ob_region(phys_addr_t addr) return (addr >> ilog2(SZ_1M)) & 0x1f; } +static u64 rockchip_pcie_ep_align_addr(struct pci_epc *epc, u64 pci_addr, + size_t *pci_size, size_t *addr_offset) +{ + struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); + size_t size = *pci_size; + u64 offset, mask; + int num_bits; + + num_bits = rockchip_pcie_ep_ob_atu_num_bits(&ep->rockchip, + pci_addr, size); + mask = (1ULL << num_bits) - 1; + + offset = pci_addr & mask; + if (size + offset > SZ_1M) + size = SZ_1M - offset; + + *pci_size = ALIGN(offset + size, ROCKCHIP_PCIE_AT_SIZE_ALIGN); + *addr_offset = offset; + + return pci_addr & ~mask; +} + static int rockchip_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, u8 vfn, phys_addr_t addr, u64 pci_addr, size_t size) @@ -461,6 +483,7 @@ static const struct pci_epc_ops rockchip_pcie_epc_ops = { .write_header = rockchip_pcie_ep_write_header, .set_bar = rockchip_pcie_ep_set_bar, .clear_bar = rockchip_pcie_ep_clear_bar, + .align_addr = rockchip_pcie_ep_align_addr, .map_addr = rockchip_pcie_ep_map_addr, .unmap_addr = rockchip_pcie_ep_unmap_addr, .set_msi = rockchip_pcie_ep_set_msi, diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 02368ce9bd54..30398156095f 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -241,6 +241,11 @@ #define ROCKCHIP_PCIE_EP_MSIX_CAP_CP_MASK GENMASK(15, 8) #define ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR 0x1 #define ROCKCHIP_PCIE_EP_PCI_LEGACY_IRQ_ADDR 0x3 + +#define ROCKCHIP_PCIE_AT_MIN_NUM_BITS 8 +#define ROCKCHIP_PCIE_AT_MAX_NUM_BITS 20 +#define ROCKCHIP_PCIE_AT_SIZE_ALIGN (1UL << ROCKCHIP_PCIE_AT_MIN_NUM_BITS) + #define ROCKCHIP_PCIE_EP_FUNC_BASE(fn) \ (PCIE_EP_PF_CONFIG_REGS_BASE + (((fn) << 12) & GENMASK(19, 12))) #define ROCKCHIP_PCIE_EP_VIRT_FUNC_BASE(fn) \ -- 2.51.0 From 9f737cca6c54edd0af0a29bb0a702837f1fc9645 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:41 +0900 Subject: [PATCH 04/16] PCI: rockchip-ep: Fix MSI IRQ data mapping MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The call to rockchip_pcie_prog_ep_ob_atu() used to map the PCI address of MSI data to the memory window allocated on probe for IRQs is done in rockchip_pcie_ep_send_msi_irq() assuming a fixed alignment to a 256B boundary of the PCI address. This is not correct as the alignment constraint for the RK3399 PCI mapping depends on the number of bits of address changing in the mapped region. This leads to an unstable system which sometimes work and sometimes does not (crashing on paging faults when memcpy_toio() or memcpy_fromio() are used). Similar to regular data mapping, the MSI data mapping must thus be handled according to the information provided by rockchip_pcie_ep_align_addr(). Modify rockchip_pcie_ep_send_msi_irq() to use rockchip_pcie_ep_align_addr() to correctly program entry 0 of the ATU for sending MSI IRQs. Link: https://lore.kernel.org/r/20241017015849.190271-7-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 5a4279591f79..299a8a95aab7 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -382,9 +382,10 @@ static int rockchip_pcie_ep_send_msi_irq(struct rockchip_pcie_ep *ep, u8 fn, { struct rockchip_pcie *rockchip = &ep->rockchip; u32 flags, mme, data, data_mask; + size_t irq_pci_size, offset; + u64 irq_pci_addr; u8 msi_count; u64 pci_addr; - u32 r; /* Check MSI enable bit */ flags = rockchip_pcie_read(&ep->rockchip, @@ -420,18 +421,21 @@ static int rockchip_pcie_ep_send_msi_irq(struct rockchip_pcie_ep *ep, u8 fn, PCI_MSI_ADDRESS_LO); /* Set the outbound region if needed. */ - if (unlikely(ep->irq_pci_addr != (pci_addr & PCIE_ADDR_MASK) || + irq_pci_size = ~PCIE_ADDR_MASK + 1; + irq_pci_addr = rockchip_pcie_ep_align_addr(ep->epc, + pci_addr & PCIE_ADDR_MASK, + &irq_pci_size, &offset); + if (unlikely(ep->irq_pci_addr != irq_pci_addr || ep->irq_pci_fn != fn)) { - r = rockchip_ob_region(ep->irq_phys_addr); - rockchip_pcie_prog_ep_ob_atu(rockchip, fn, r, - ep->irq_phys_addr, - pci_addr & PCIE_ADDR_MASK, - ~PCIE_ADDR_MASK + 1); - ep->irq_pci_addr = (pci_addr & PCIE_ADDR_MASK); + rockchip_pcie_prog_ep_ob_atu(rockchip, fn, + rockchip_ob_region(ep->irq_phys_addr), + ep->irq_phys_addr, + irq_pci_addr, irq_pci_size); + ep->irq_pci_addr = irq_pci_addr; ep->irq_pci_fn = fn; } - writew(data, ep->irq_cpu_addr + (pci_addr & ~PCIE_ADDR_MASK)); + writew(data, ep->irq_cpu_addr + offset + (pci_addr & ~PCIE_ADDR_MASK)); return 0; } -- 2.51.0 From 2968534e63e5bbe3d3f3c317ba32012393ee6637 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:42 +0900 Subject: [PATCH 05/16] PCI: rockchip-ep: Rename rockchip_pcie_parse_ep_dt() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit To be consistent with the usual "get_resources" naming of driver functions that acquire controller resources like clocks, PHY etc, rename the function rockchip_pcie_parse_ep_dt() to rockchip_pcie_ep_get_resources(). No functional changes. Link: https://lore.kernel.org/r/20241017015849.190271-8-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 299a8a95aab7..c7c4afe1ef69 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -497,8 +497,8 @@ static const struct pci_epc_ops rockchip_pcie_epc_ops = { .get_features = rockchip_pcie_ep_get_features, }; -static int rockchip_pcie_parse_ep_dt(struct rockchip_pcie *rockchip, - struct rockchip_pcie_ep *ep) +static int rockchip_pcie_ep_get_resources(struct rockchip_pcie *rockchip, + struct rockchip_pcie_ep *ep) { struct device *dev = rockchip->dev; int err; @@ -560,7 +560,7 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) ep->epc = epc; epc_set_drvdata(epc, ep); - err = rockchip_pcie_parse_ep_dt(rockchip, ep); + err = rockchip_pcie_ep_get_resources(rockchip, ep); if (err) return err; -- 2.51.0 From 945648019466db06dca189a639af913f2834ee9b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:43 +0900 Subject: [PATCH 06/16] PCI: rockchip-ep: Refactor rockchip_pcie_ep_probe() memory allocations MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Introduce the function rockchip_pcie_ep_init_ob_mem() allocate the outbound memory regions and memory needed for IRQ handling. These changes tidy up rockchip_pcie_ep_probe(). No functional change. Link: https://lore.kernel.org/r/20241017015849.190271-9-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 107 ++++++++++++---------- 1 file changed, 61 insertions(+), 46 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index c7c4afe1ef69..d497f880eb2c 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -532,15 +532,66 @@ static const struct of_device_id rockchip_pcie_ep_of_match[] = { {}, }; +static int rockchip_pcie_ep_init_ob_mem(struct rockchip_pcie_ep *ep) +{ + struct rockchip_pcie *rockchip = &ep->rockchip; + struct device *dev = rockchip->dev; + struct pci_epc_mem_window *windows = NULL; + int err, i; + + ep->ob_addr = devm_kcalloc(dev, ep->max_regions, sizeof(*ep->ob_addr), + GFP_KERNEL); + + if (!ep->ob_addr) + return -ENOMEM; + + windows = devm_kcalloc(dev, ep->max_regions, + sizeof(struct pci_epc_mem_window), GFP_KERNEL); + if (!windows) + return -ENOMEM; + + for (i = 0; i < ep->max_regions; i++) { + windows[i].phys_base = rockchip->mem_res->start + (SZ_1M * i); + windows[i].size = SZ_1M; + windows[i].page_size = SZ_1M; + } + err = pci_epc_multi_mem_init(ep->epc, windows, ep->max_regions); + devm_kfree(dev, windows); + + if (err < 0) { + dev_err(dev, "failed to initialize the memory space\n"); + return err; + } + + ep->irq_cpu_addr = pci_epc_mem_alloc_addr(ep->epc, &ep->irq_phys_addr, + SZ_1M); + if (!ep->irq_cpu_addr) { + dev_err(dev, "failed to reserve memory space for MSI\n"); + goto err_epc_mem_exit; + } + + ep->irq_pci_addr = ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR; + + return 0; + +err_epc_mem_exit: + pci_epc_mem_exit(ep->epc); + + return err; +} + +static void rockchip_pcie_ep_exit_ob_mem(struct rockchip_pcie_ep *ep) +{ + pci_epc_mem_exit(ep->epc); +} + static int rockchip_pcie_ep_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct rockchip_pcie_ep *ep; struct rockchip_pcie *rockchip; struct pci_epc *epc; - size_t max_regions; - struct pci_epc_mem_window *windows = NULL; - int err, i; + int err; u32 cfg_msi, cfg_msix_cp; ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL); @@ -564,10 +615,14 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) if (err) return err; - err = rockchip_pcie_enable_clocks(rockchip); + err = rockchip_pcie_ep_init_ob_mem(ep); if (err) return err; + err = rockchip_pcie_enable_clocks(rockchip); + if (err) + goto err_exit_ob_mem; + err = rockchip_pcie_init_port(rockchip); if (err) goto err_disable_clocks; @@ -576,47 +631,9 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) rockchip_pcie_write(rockchip, PCIE_CLIENT_LINK_TRAIN_ENABLE, PCIE_CLIENT_CONFIG); - max_regions = ep->max_regions; - ep->ob_addr = devm_kcalloc(dev, max_regions, sizeof(*ep->ob_addr), - GFP_KERNEL); - - if (!ep->ob_addr) { - err = -ENOMEM; - goto err_uninit_port; - } - /* Only enable function 0 by default */ rockchip_pcie_write(rockchip, BIT(0), PCIE_CORE_PHY_FUNC_CFG); - windows = devm_kcalloc(dev, ep->max_regions, - sizeof(struct pci_epc_mem_window), GFP_KERNEL); - if (!windows) { - err = -ENOMEM; - goto err_uninit_port; - } - for (i = 0; i < ep->max_regions; i++) { - windows[i].phys_base = rockchip->mem_res->start + (SZ_1M * i); - windows[i].size = SZ_1M; - windows[i].page_size = SZ_1M; - } - err = pci_epc_multi_mem_init(epc, windows, ep->max_regions); - devm_kfree(dev, windows); - - if (err < 0) { - dev_err(dev, "failed to initialize the memory space\n"); - goto err_uninit_port; - } - - ep->irq_cpu_addr = pci_epc_mem_alloc_addr(epc, &ep->irq_phys_addr, - SZ_1M); - if (!ep->irq_cpu_addr) { - dev_err(dev, "failed to reserve memory space for MSI\n"); - err = -ENOMEM; - goto err_epc_mem_exit; - } - - ep->irq_pci_addr = ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR; - /* * MSI-X is not supported but the controller still advertises the MSI-X * capability by default, which can lead to the Root Complex side @@ -646,10 +663,8 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) pci_epc_init_notify(epc); return 0; -err_epc_mem_exit: - pci_epc_mem_exit(epc); -err_uninit_port: - rockchip_pcie_deinit_phys(rockchip); +err_exit_ob_mem: + rockchip_pcie_ep_exit_ob_mem(ep); err_disable_clocks: rockchip_pcie_disable_clocks(rockchip); return err; -- 2.51.0 From 8efda8aebeed59bab984489407a94008f65f4d1e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:44 +0900 Subject: [PATCH 07/16] PCI: rockchip-ep: Refactor rockchip_pcie_ep_probe() MSI-X hiding MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Move the code in rockchip_pcie_ep_probe() to hide the MSI-X capability to its own function, rockchip_pcie_ep_hide_broken_msix_cap(). No functional changes. Link: https://lore.kernel.org/r/20241017015849.190271-10-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/pcie-rockchip-ep.c | 54 +++++++++++++---------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index d497f880eb2c..a53ff16b1661 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -585,6 +585,34 @@ static void rockchip_pcie_ep_exit_ob_mem(struct rockchip_pcie_ep *ep) pci_epc_mem_exit(ep->epc); } +static void rockchip_pcie_ep_hide_broken_msix_cap(struct rockchip_pcie *rockchip) +{ + u32 cfg_msi, cfg_msix_cp; + + /* + * MSI-X is not supported but the controller still advertises the MSI-X + * capability by default, which can lead to the Root Complex side + * allocating MSI-X vectors which cannot be used. Avoid this by skipping + * the MSI-X capability entry in the PCIe capabilities linked-list: get + * the next pointer from the MSI-X entry and set that in the MSI + * capability entry (which is the previous entry). This way the MSI-X + * entry is skipped (left out of the linked-list) and not advertised. + */ + cfg_msi = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_BASE + + ROCKCHIP_PCIE_EP_MSI_CTRL_REG); + + cfg_msi &= ~ROCKCHIP_PCIE_EP_MSI_CP1_MASK; + + cfg_msix_cp = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_BASE + + ROCKCHIP_PCIE_EP_MSIX_CAP_REG) & + ROCKCHIP_PCIE_EP_MSIX_CAP_CP_MASK; + + cfg_msi |= cfg_msix_cp; + + rockchip_pcie_write(rockchip, cfg_msi, + PCIE_EP_CONFIG_BASE + ROCKCHIP_PCIE_EP_MSI_CTRL_REG); +} + static int rockchip_pcie_ep_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -592,7 +620,6 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) struct rockchip_pcie *rockchip; struct pci_epc *epc; int err; - u32 cfg_msi, cfg_msix_cp; ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL); if (!ep) @@ -627,6 +654,8 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) if (err) goto err_disable_clocks; + rockchip_pcie_ep_hide_broken_msix_cap(rockchip); + /* Establish the link automatically */ rockchip_pcie_write(rockchip, PCIE_CLIENT_LINK_TRAIN_ENABLE, PCIE_CLIENT_CONFIG); @@ -634,29 +663,6 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) /* Only enable function 0 by default */ rockchip_pcie_write(rockchip, BIT(0), PCIE_CORE_PHY_FUNC_CFG); - /* - * MSI-X is not supported but the controller still advertises the MSI-X - * capability by default, which can lead to the Root Complex side - * allocating MSI-X vectors which cannot be used. Avoid this by skipping - * the MSI-X capability entry in the PCIe capabilities linked-list: get - * the next pointer from the MSI-X entry and set that in the MSI - * capability entry (which is the previous entry). This way the MSI-X - * entry is skipped (left out of the linked-list) and not advertised. - */ - cfg_msi = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_BASE + - ROCKCHIP_PCIE_EP_MSI_CTRL_REG); - - cfg_msi &= ~ROCKCHIP_PCIE_EP_MSI_CP1_MASK; - - cfg_msix_cp = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_BASE + - ROCKCHIP_PCIE_EP_MSIX_CAP_REG) & - ROCKCHIP_PCIE_EP_MSIX_CAP_CP_MASK; - - cfg_msi |= cfg_msix_cp; - - rockchip_pcie_write(rockchip, cfg_msi, - PCIE_EP_CONFIG_BASE + ROCKCHIP_PCIE_EP_MSI_CTRL_REG); - rockchip_pcie_write(rockchip, PCIE_CLIENT_CONF_ENABLE, PCIE_CLIENT_CONFIG); -- 2.51.0 From 091022f5f94513e5f11aa9e2f1cd9fdb4d00f83c Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:45 +0900 Subject: [PATCH 08/16] PCI: rockchip-ep: Refactor endpoint link training enable MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The function rockchip_pcie_init_port() enables link training for a controller configured in EP mode. Enabling link training is again done in rockchip_pcie_ep_probe() after that function executed rockchip_pcie_init_port(). Enabling link training only needs to be done once, and doing so at the probe stage before the controller is actually started by the user serves no purpose. Refactor this by removing the link training enablement from both rockchip_pcie_init_port() and rockchip_pcie_ep_probe() and moving it to the endpoint start operation defined with rockchip_pcie_ep_start(). Enabling the controller configuration using the PCIE_CLIENT_CONF_ENABLE bit in the same PCIE_CLIENT_CONFIG register is also moved to rockchip_pcie_ep_start() and both the controller configuration and link training enable bits are set with a single call to rockchip_pcie_write(). Link: https://lore.kernel.org/r/20241017015849.190271-11-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 13 ++++++------- drivers/pci/controller/pcie-rockchip.c | 5 +++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index a53ff16b1661..6c4169b7930e 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -467,6 +467,12 @@ static int rockchip_pcie_ep_start(struct pci_epc *epc) rockchip_pcie_write(rockchip, cfg, PCIE_CORE_PHY_FUNC_CFG); + /* Enable configuration and start link training */ + rockchip_pcie_write(rockchip, + PCIE_CLIENT_LINK_TRAIN_ENABLE | + PCIE_CLIENT_CONF_ENABLE, + PCIE_CLIENT_CONFIG); + return 0; } @@ -656,16 +662,9 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) rockchip_pcie_ep_hide_broken_msix_cap(rockchip); - /* Establish the link automatically */ - rockchip_pcie_write(rockchip, PCIE_CLIENT_LINK_TRAIN_ENABLE, - PCIE_CLIENT_CONFIG); - /* Only enable function 0 by default */ rockchip_pcie_write(rockchip, BIT(0), PCIE_CORE_PHY_FUNC_CFG); - rockchip_pcie_write(rockchip, PCIE_CLIENT_CONF_ENABLE, - PCIE_CLIENT_CONFIG); - pci_epc_init_notify(epc); return 0; diff --git a/drivers/pci/controller/pcie-rockchip.c b/drivers/pci/controller/pcie-rockchip.c index c07d7129f1c7..154e78819e6e 100644 --- a/drivers/pci/controller/pcie-rockchip.c +++ b/drivers/pci/controller/pcie-rockchip.c @@ -244,11 +244,12 @@ int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) rockchip_pcie_write(rockchip, PCIE_CLIENT_GEN_SEL_1, PCIE_CLIENT_CONFIG); - regs = PCIE_CLIENT_LINK_TRAIN_ENABLE | PCIE_CLIENT_ARI_ENABLE | + regs = PCIE_CLIENT_ARI_ENABLE | PCIE_CLIENT_CONF_LANE_NUM(rockchip->lanes); if (rockchip->is_rc) - regs |= PCIE_CLIENT_CONF_ENABLE | PCIE_CLIENT_MODE_RC; + regs |= PCIE_CLIENT_LINK_TRAIN_ENABLE | + PCIE_CLIENT_CONF_ENABLE | PCIE_CLIENT_MODE_RC; else regs |= PCIE_CLIENT_CONF_DISABLE | PCIE_CLIENT_MODE_EP; -- 2.51.0 From 00080d0887df8c197c5b89e38215c14980eae544 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:46 +0900 Subject: [PATCH 09/16] PCI: rockship-ep: Implement the pci_epc_ops::stop_link() operation MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Define the EPC operation ->stop() for the Rockchip endpoint driver with the function rockchip_pcie_ep_stop(). This function disables link training and the controller configuration, as the reverse to what the start operation defined with rockchip_pcie_ep_start() does. Link: https://lore.kernel.org/r/20241017015849.190271-12-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/pcie-rockchip-ep.c | 13 +++++++++++++ drivers/pci/controller/pcie-rockchip.h | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 6c4169b7930e..13755e4ed59d 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -476,6 +476,18 @@ static int rockchip_pcie_ep_start(struct pci_epc *epc) return 0; } +static void rockchip_pcie_ep_stop(struct pci_epc *epc) +{ + struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); + struct rockchip_pcie *rockchip = &ep->rockchip; + + /* Stop link training and disable configuration */ + rockchip_pcie_write(rockchip, + PCIE_CLIENT_CONF_DISABLE | + PCIE_CLIENT_LINK_TRAIN_DISABLE, + PCIE_CLIENT_CONFIG); +} + static const struct pci_epc_features rockchip_pcie_epc_features = { .linkup_notifier = false, .msi_capable = true, @@ -500,6 +512,7 @@ static const struct pci_epc_ops rockchip_pcie_epc_ops = { .get_msi = rockchip_pcie_ep_get_msi, .raise_irq = rockchip_pcie_ep_raise_irq, .start = rockchip_pcie_ep_start, + .stop = rockchip_pcie_ep_stop, .get_features = rockchip_pcie_ep_get_features, }; diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 30398156095f..0263f158ee8d 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -32,6 +32,7 @@ #define PCIE_CLIENT_CONF_ENABLE HIWORD_UPDATE_BIT(0x0001) #define PCIE_CLIENT_CONF_DISABLE HIWORD_UPDATE(0x0001, 0) #define PCIE_CLIENT_LINK_TRAIN_ENABLE HIWORD_UPDATE_BIT(0x0002) +#define PCIE_CLIENT_LINK_TRAIN_DISABLE HIWORD_UPDATE(0x0002, 0) #define PCIE_CLIENT_ARI_ENABLE HIWORD_UPDATE_BIT(0x0008) #define PCIE_CLIENT_CONF_LANE_NUM(x) HIWORD_UPDATE(0x0030, ENCODE_LANES(x)) #define PCIE_CLIENT_MODE_RC HIWORD_UPDATE_BIT(0x0040) -- 2.51.0 From bd6e61df4b2e69985daa312ce28b6af629b30870 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:47 +0900 Subject: [PATCH 10/16] PCI: rockchip-ep: Improve link training MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The Rockchip RK3399 TRM V1.3 Part2, Section 17.5.8.1.2, step 7, describes the endpoint mode link training process clearly and states that: Insure link training completion and success by observing link_st field in PCIe Client BASIC_STATUS1 register change to 2'b11. If both side support PCIe Gen2 speed, re-train can be Initiated by asserting the Retrain Link field in Link Control and Status Register. The software should insure the BASIC_STATUS0[negotiated_speed] changes to "1", that indicates re-train to Gen2 successfully. This procedure is very similar to what is done for the root-port mode in rockchip_pcie_host_init_port(). Implement this link training procedure for the endpoint mode as well. Given that the RK3399 SoC does not have an interrupt signaling link status changes, training is implemented as a delayed work which is rescheduled until the link training completes or the endpoint controller is stopped. The link training work is first scheduled in rockchip_pcie_ep_start() when the endpoint function is started. Link training completion is signaled to the function using pci_epc_linkup(). Accordingly, the linkup_notifier field of the Rockchip pci_epc_features structure is changed to true. Link: https://lore.kernel.org/r/20241017015849.190271-13-dlemoal@kernel.org Signed-off-by: Damien Le Moal [kwilczynski: update log messages to make them consistent] Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 82 ++++++++++++++++++++++- drivers/pci/controller/pcie-rockchip.h | 11 +++ 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 13755e4ed59d..b4d405e4c954 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -10,12 +10,14 @@ #include #include +#include #include #include #include #include #include #include +#include #include "pcie-rockchip.h" @@ -48,6 +50,7 @@ struct rockchip_pcie_ep { u64 irq_pci_addr; u8 irq_pci_fn; u8 irq_pending; + struct delayed_work link_training; }; static void rockchip_pcie_clear_ep_ob_atu(struct rockchip_pcie *rockchip, @@ -473,6 +476,8 @@ static int rockchip_pcie_ep_start(struct pci_epc *epc) PCIE_CLIENT_CONF_ENABLE, PCIE_CLIENT_CONFIG); + schedule_delayed_work(&ep->link_training, 0); + return 0; } @@ -481,6 +486,8 @@ static void rockchip_pcie_ep_stop(struct pci_epc *epc) struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); struct rockchip_pcie *rockchip = &ep->rockchip; + cancel_delayed_work_sync(&ep->link_training); + /* Stop link training and disable configuration */ rockchip_pcie_write(rockchip, PCIE_CLIENT_CONF_DISABLE | @@ -488,8 +495,80 @@ static void rockchip_pcie_ep_stop(struct pci_epc *epc) PCIE_CLIENT_CONFIG); } +static void rockchip_pcie_ep_retrain_link(struct rockchip_pcie *rockchip) +{ + u32 status; + + status = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_LCS); + status |= PCI_EXP_LNKCTL_RL; + rockchip_pcie_write(rockchip, status, PCIE_EP_CONFIG_LCS); +} + +static bool rockchip_pcie_ep_link_up(struct rockchip_pcie *rockchip) +{ + u32 val = rockchip_pcie_read(rockchip, PCIE_CLIENT_BASIC_STATUS1); + + return PCIE_LINK_UP(val); +} + +static void rockchip_pcie_ep_link_training(struct work_struct *work) +{ + struct rockchip_pcie_ep *ep = + container_of(work, struct rockchip_pcie_ep, link_training.work); + struct rockchip_pcie *rockchip = &ep->rockchip; + struct device *dev = rockchip->dev; + u32 val; + int ret; + + /* Enable Gen1 training and wait for its completion */ + ret = readl_poll_timeout(rockchip->apb_base + PCIE_CORE_CTRL, + val, PCIE_LINK_TRAINING_DONE(val), 50, + LINK_TRAIN_TIMEOUT); + if (ret) + goto again; + + /* Make sure that the link is up */ + ret = readl_poll_timeout(rockchip->apb_base + PCIE_CLIENT_BASIC_STATUS1, + val, PCIE_LINK_UP(val), 50, + LINK_TRAIN_TIMEOUT); + if (ret) + goto again; + + /* + * Check the current speed: if gen2 speed was requested and we are not + * at gen2 speed yet, retrain again for gen2. + */ + val = rockchip_pcie_read(rockchip, PCIE_CORE_CTRL); + if (!PCIE_LINK_IS_GEN2(val) && rockchip->link_gen == 2) { + /* Enable retrain for gen2 */ + rockchip_pcie_ep_retrain_link(rockchip); + readl_poll_timeout(rockchip->apb_base + PCIE_CORE_CTRL, + val, PCIE_LINK_IS_GEN2(val), 50, + LINK_TRAIN_TIMEOUT); + } + + /* Check again that the link is up */ + if (!rockchip_pcie_ep_link_up(rockchip)) + goto again; + + val = rockchip_pcie_read(rockchip, PCIE_CLIENT_BASIC_STATUS0); + dev_info(dev, + "link up (negotiated speed: %sGT/s, width: x%lu)\n", + (val & PCIE_CLIENT_NEG_LINK_SPEED) ? "5" : "2.5", + ((val & PCIE_CLIENT_NEG_LINK_WIDTH_MASK) >> + PCIE_CLIENT_NEG_LINK_WIDTH_SHIFT) << 1); + + /* Notify the function */ + pci_epc_linkup(ep->epc); + + return; + +again: + schedule_delayed_work(&ep->link_training, msecs_to_jiffies(5)); +} + static const struct pci_epc_features rockchip_pcie_epc_features = { - .linkup_notifier = false, + .linkup_notifier = true, .msi_capable = true, .msix_capable = false, .align = ROCKCHIP_PCIE_AT_SIZE_ALIGN, @@ -647,6 +726,7 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) rockchip = &ep->rockchip; rockchip->is_rc = false; rockchip->dev = dev; + INIT_DELAYED_WORK(&ep->link_training, rockchip_pcie_ep_link_training); epc = devm_pci_epc_create(dev, &rockchip_pcie_epc_ops); if (IS_ERR(epc)) { diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 0263f158ee8d..24796176f658 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -26,6 +26,7 @@ #define MAX_LANE_NUM 4 #define MAX_REGION_LIMIT 32 #define MIN_EP_APERTURE 28 +#define LINK_TRAIN_TIMEOUT (500 * USEC_PER_MSEC) #define PCIE_CLIENT_BASE 0x0 #define PCIE_CLIENT_CONFIG (PCIE_CLIENT_BASE + 0x00) @@ -50,6 +51,10 @@ #define PCIE_CLIENT_DEBUG_LTSSM_MASK GENMASK(5, 0) #define PCIE_CLIENT_DEBUG_LTSSM_L1 0x18 #define PCIE_CLIENT_DEBUG_LTSSM_L2 0x19 +#define PCIE_CLIENT_BASIC_STATUS0 (PCIE_CLIENT_BASE + 0x44) +#define PCIE_CLIENT_NEG_LINK_WIDTH_MASK GENMASK(7, 6) +#define PCIE_CLIENT_NEG_LINK_WIDTH_SHIFT 6 +#define PCIE_CLIENT_NEG_LINK_SPEED BIT(5) #define PCIE_CLIENT_BASIC_STATUS1 (PCIE_CLIENT_BASE + 0x48) #define PCIE_CLIENT_LINK_STATUS_UP 0x00300000 #define PCIE_CLIENT_LINK_STATUS_MASK 0x00300000 @@ -87,6 +92,8 @@ #define PCIE_CORE_CTRL_MGMT_BASE 0x900000 #define PCIE_CORE_CTRL (PCIE_CORE_CTRL_MGMT_BASE + 0x000) +#define PCIE_CORE_PL_CONF_LS_MASK 0x00000001 +#define PCIE_CORE_PL_CONF_LS_READY 0x00000001 #define PCIE_CORE_PL_CONF_SPEED_5G 0x00000008 #define PCIE_CORE_PL_CONF_SPEED_MASK 0x00000018 #define PCIE_CORE_PL_CONF_LANE_MASK 0x00000006 @@ -144,6 +151,7 @@ #define PCIE_RC_CONFIG_BASE 0xa00000 #define PCIE_EP_CONFIG_BASE 0xa00000 #define PCIE_EP_CONFIG_DID_VID (PCIE_EP_CONFIG_BASE + 0x00) +#define PCIE_EP_CONFIG_LCS (PCIE_EP_CONFIG_BASE + 0xd0) #define PCIE_RC_CONFIG_RID_CCR (PCIE_RC_CONFIG_BASE + 0x08) #define PCIE_RC_CONFIG_DCR (PCIE_RC_CONFIG_BASE + 0xc4) #define PCIE_RC_CONFIG_DCR_CSPL_SHIFT 18 @@ -155,6 +163,7 @@ #define PCIE_RC_CONFIG_LINK_CAP (PCIE_RC_CONFIG_BASE + 0xcc) #define PCIE_RC_CONFIG_LINK_CAP_L0S BIT(10) #define PCIE_RC_CONFIG_LCS (PCIE_RC_CONFIG_BASE + 0xd0) +#define PCIE_EP_CONFIG_LCS (PCIE_EP_CONFIG_BASE + 0xd0) #define PCIE_RC_CONFIG_L1_SUBSTATE_CTRL2 (PCIE_RC_CONFIG_BASE + 0x90c) #define PCIE_RC_CONFIG_THP_CAP (PCIE_RC_CONFIG_BASE + 0x274) #define PCIE_RC_CONFIG_THP_CAP_NEXT_MASK GENMASK(31, 20) @@ -192,6 +201,8 @@ #define ROCKCHIP_VENDOR_ID 0x1d87 #define PCIE_LINK_IS_L2(x) \ (((x) & PCIE_CLIENT_DEBUG_LTSSM_MASK) == PCIE_CLIENT_DEBUG_LTSSM_L2) +#define PCIE_LINK_TRAINING_DONE(x) \ + (((x) & PCIE_CORE_PL_CONF_LS_MASK) == PCIE_CORE_PL_CONF_LS_READY) #define PCIE_LINK_UP(x) \ (((x) & PCIE_CLIENT_LINK_STATUS_MASK) == PCIE_CLIENT_LINK_STATUS_UP) #define PCIE_LINK_IS_GEN2(x) \ -- 2.51.0 From a7137cbf6bd53a9f9c40c64fc8b12b88289b3d4a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:48 +0900 Subject: [PATCH 11/16] PCI: rockchip-ep: Handle PERST# signal in EP mode MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Currently, the Rockchip PCIe endpoint controller driver does not handle the PERST# signal, which prevents detecting when link training should actually be started or if the host resets the device. This however can be supported using the controller reset_gpios property set as an input GPIO for endpoint mode. Modify the Rockchip PCI endpoint controller driver to get the reset_gpio and its associated interrupt which is serviced using a threaded IRQ with the function rockchip_pcie_ep_perst_irq_thread() as handler. This handler function notifies a link down event corresponding to the RC side asserting the PERST# signal using pci_epc_linkdown() when the gpio is high. Once the gpio value goes down, corresponding to the RC de-asserting the PERST# signal, link training is started. The polarity of the gpio interrupt trigger is changed from high to low after the RC asserted PERST#, and conversely changed from low to high after the RC de-asserts PERST#. Also, given that the host mode controller and the endpoint mode controller use two different property names for the same PERST# signal (ep_gpios property and reset_gpios property respectively), for clarity, rename the ep_gpio field of struct rockchip_pcie to perst_gpio. Link: https://lore.kernel.org/r/20241017015849.190271-14-dlemoal@kernel.org Signed-off-by: Damien Le Moal [kwilczynski: make log messages consistent, add missing include] Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 133 +++++++++++++++++++- drivers/pci/controller/pcie-rockchip-host.c | 4 +- drivers/pci/controller/pcie-rockchip.c | 16 +-- drivers/pci/controller/pcie-rockchip.h | 2 +- 4 files changed, 141 insertions(+), 14 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index b4d405e4c954..1064b7b06cef 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -10,8 +10,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -50,6 +52,9 @@ struct rockchip_pcie_ep { u64 irq_pci_addr; u8 irq_pci_fn; u8 irq_pending; + int perst_irq; + bool perst_asserted; + bool link_up; struct delayed_work link_training; }; @@ -470,13 +475,17 @@ static int rockchip_pcie_ep_start(struct pci_epc *epc) rockchip_pcie_write(rockchip, cfg, PCIE_CORE_PHY_FUNC_CFG); + if (rockchip->perst_gpio) + enable_irq(ep->perst_irq); + /* Enable configuration and start link training */ rockchip_pcie_write(rockchip, PCIE_CLIENT_LINK_TRAIN_ENABLE | PCIE_CLIENT_CONF_ENABLE, PCIE_CLIENT_CONFIG); - schedule_delayed_work(&ep->link_training, 0); + if (!rockchip->perst_gpio) + schedule_delayed_work(&ep->link_training, 0); return 0; } @@ -486,6 +495,11 @@ static void rockchip_pcie_ep_stop(struct pci_epc *epc) struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); struct rockchip_pcie *rockchip = &ep->rockchip; + if (rockchip->perst_gpio) { + ep->perst_asserted = true; + disable_irq(ep->perst_irq); + } + cancel_delayed_work_sync(&ep->link_training); /* Stop link training and disable configuration */ @@ -551,6 +565,13 @@ static void rockchip_pcie_ep_link_training(struct work_struct *work) if (!rockchip_pcie_ep_link_up(rockchip)) goto again; + /* + * If PERST# was asserted while polling the link, do not notify + * the function. + */ + if (ep->perst_asserted) + return; + val = rockchip_pcie_read(rockchip, PCIE_CLIENT_BASIC_STATUS0); dev_info(dev, "link up (negotiated speed: %sGT/s, width: x%lu)\n", @@ -560,6 +581,7 @@ static void rockchip_pcie_ep_link_training(struct work_struct *work) /* Notify the function */ pci_epc_linkup(ep->epc); + ep->link_up = true; return; @@ -567,6 +589,103 @@ again: schedule_delayed_work(&ep->link_training, msecs_to_jiffies(5)); } +static void rockchip_pcie_ep_perst_assert(struct rockchip_pcie_ep *ep) +{ + struct rockchip_pcie *rockchip = &ep->rockchip; + + dev_dbg(rockchip->dev, "PERST# asserted, link down\n"); + + if (ep->perst_asserted) + return; + + ep->perst_asserted = true; + + cancel_delayed_work_sync(&ep->link_training); + + if (ep->link_up) { + pci_epc_linkdown(ep->epc); + ep->link_up = false; + } +} + +static void rockchip_pcie_ep_perst_deassert(struct rockchip_pcie_ep *ep) +{ + struct rockchip_pcie *rockchip = &ep->rockchip; + + dev_dbg(rockchip->dev, "PERST# de-asserted, starting link training\n"); + + if (!ep->perst_asserted) + return; + + ep->perst_asserted = false; + + /* Enable link re-training */ + rockchip_pcie_ep_retrain_link(rockchip); + + /* Start link training */ + schedule_delayed_work(&ep->link_training, 0); +} + +static irqreturn_t rockchip_pcie_ep_perst_irq_thread(int irq, void *data) +{ + struct pci_epc *epc = data; + struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); + struct rockchip_pcie *rockchip = &ep->rockchip; + u32 perst = gpiod_get_value(rockchip->perst_gpio); + + if (perst) + rockchip_pcie_ep_perst_assert(ep); + else + rockchip_pcie_ep_perst_deassert(ep); + + irq_set_irq_type(ep->perst_irq, + (perst ? IRQF_TRIGGER_HIGH : IRQF_TRIGGER_LOW)); + + return IRQ_HANDLED; +} + +static int rockchip_pcie_ep_setup_irq(struct pci_epc *epc) +{ + struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); + struct rockchip_pcie *rockchip = &ep->rockchip; + struct device *dev = rockchip->dev; + int ret; + + if (!rockchip->perst_gpio) + return 0; + + /* PCIe reset interrupt */ + ep->perst_irq = gpiod_to_irq(rockchip->perst_gpio); + if (ep->perst_irq < 0) { + dev_err(dev, + "failed to get IRQ for PERST# GPIO: %d\n", + ep->perst_irq); + + return ep->perst_irq; + } + + /* + * The perst_gpio is active low, so when it is inactive on start, it + * is high and will trigger the perst_irq handler. So treat this initial + * IRQ as a dummy one by faking the host asserting PERST#. + */ + ep->perst_asserted = true; + irq_set_status_flags(ep->perst_irq, IRQ_NOAUTOEN); + ret = devm_request_threaded_irq(dev, ep->perst_irq, NULL, + rockchip_pcie_ep_perst_irq_thread, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + "pcie-ep-perst", epc); + if (ret) { + dev_err(dev, + "failed to request IRQ for PERST# GPIO: %d\n", + ret); + + return ret; + } + + return 0; +} + static const struct pci_epc_features rockchip_pcie_epc_features = { .linkup_notifier = true, .msi_capable = true, @@ -730,7 +849,7 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) epc = devm_pci_epc_create(dev, &rockchip_pcie_epc_ops); if (IS_ERR(epc)) { - dev_err(dev, "failed to create epc device\n"); + dev_err(dev, "failed to create EPC device\n"); return PTR_ERR(epc); } @@ -760,11 +879,17 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) pci_epc_init_notify(epc); + err = rockchip_pcie_ep_setup_irq(epc); + if (err < 0) + goto err_uninit_port; + return 0; -err_exit_ob_mem: - rockchip_pcie_ep_exit_ob_mem(ep); +err_uninit_port: + rockchip_pcie_deinit_phys(rockchip); err_disable_clocks: rockchip_pcie_disable_clocks(rockchip); +err_exit_ob_mem: + rockchip_pcie_ep_exit_ob_mem(ep); return err; } diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c index cbec71114825..7471d9fd18bc 100644 --- a/drivers/pci/controller/pcie-rockchip-host.c +++ b/drivers/pci/controller/pcie-rockchip-host.c @@ -294,7 +294,7 @@ static int rockchip_pcie_host_init_port(struct rockchip_pcie *rockchip) int err, i = MAX_LANE_NUM; u32 status; - gpiod_set_value_cansleep(rockchip->ep_gpio, 0); + gpiod_set_value_cansleep(rockchip->perst_gpio, 0); err = rockchip_pcie_init_port(rockchip); if (err) @@ -323,7 +323,7 @@ static int rockchip_pcie_host_init_port(struct rockchip_pcie *rockchip) PCIE_CLIENT_CONFIG); msleep(PCIE_T_PVPERL_MS); - gpiod_set_value_cansleep(rockchip->ep_gpio, 1); + gpiod_set_value_cansleep(rockchip->perst_gpio, 1); msleep(PCIE_T_RRS_READY_MS); diff --git a/drivers/pci/controller/pcie-rockchip.c b/drivers/pci/controller/pcie-rockchip.c index 154e78819e6e..b9ade7632e11 100644 --- a/drivers/pci/controller/pcie-rockchip.c +++ b/drivers/pci/controller/pcie-rockchip.c @@ -119,13 +119,15 @@ int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip) return PTR_ERR(rockchip->aclk_rst); } - if (rockchip->is_rc) { - rockchip->ep_gpio = devm_gpiod_get_optional(dev, "ep", - GPIOD_OUT_LOW); - if (IS_ERR(rockchip->ep_gpio)) - return dev_err_probe(dev, PTR_ERR(rockchip->ep_gpio), - "failed to get ep GPIO\n"); - } + if (rockchip->is_rc) + rockchip->perst_gpio = devm_gpiod_get_optional(dev, "ep", + GPIOD_OUT_LOW); + else + rockchip->perst_gpio = devm_gpiod_get_optional(dev, "reset", + GPIOD_IN); + if (IS_ERR(rockchip->perst_gpio)) + return dev_err_probe(dev, PTR_ERR(rockchip->perst_gpio), + "failed to get PERST# GPIO\n"); rockchip->aclk_pcie = devm_clk_get(dev, "aclk"); if (IS_ERR(rockchip->aclk_pcie)) { diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 24796176f658..a51b087ce878 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -329,7 +329,7 @@ struct rockchip_pcie { struct regulator *vpcie3v3; /* 3.3V power supply */ struct regulator *vpcie1v8; /* 1.8V power supply */ struct regulator *vpcie0v9; /* 0.9V power supply */ - struct gpio_desc *ep_gpio; + struct gpio_desc *perst_gpio; u32 lanes; u8 lanes_map; int link_gen; -- 2.51.0 From 43a43faf5376114161aa684834d24e06da596287 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 22 Nov 2024 11:18:25 -0800 Subject: [PATCH 12/16] futex: improve user space accesses Josh Poimboeuf reports that he got a "will-it-scale.per_process_ops 1.9% improvement" report for his patch that changed __get_user() to use pointer masking instead of the explicit speculation barrier. However, that patch doesn't actually work in the general case, because some (very bad) architecture-specific code actually depends on __get_user() also working on kernel addresses. A profile showed that the offending __get_user() was the futex code, which really should be fixed up to not use that horrid legacy case. Rewrite futex_get_value_locked() to use the modern user acccess helpers, and inline it so that the compiler not only avoids the function call for a few instructions, but can do CSE on the address masking. It also turns out the x86 futex functions have unnecessary barriers in other places, so let's fix those up too. Link: https://lore.kernel.org/all/20241115230653.hfvzyf3aqqntgp63@jpoimboe/ Reported-by: Josh Poimboeuf Signed-off-by: Linus Torvalds --- arch/x86/include/asm/futex.h | 8 +++-- kernel/futex/core.c | 22 -------------- kernel/futex/futex.h | 59 ++++++++++++++++++++++++++++++++++-- 3 files changed, 63 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index 99d345b686fa..6e2458088800 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -48,7 +48,9 @@ do { \ static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { - if (!user_access_begin(uaddr, sizeof(u32))) + if (can_do_masked_user_access()) + uaddr = masked_user_access_begin(uaddr); + else if (!user_access_begin(uaddr, sizeof(u32))) return -EFAULT; switch (op) { @@ -84,7 +86,9 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, { int ret = 0; - if (!user_access_begin(uaddr, sizeof(u32))) + if (can_do_masked_user_access()) + uaddr = masked_user_access_begin(uaddr); + else if (!user_access_begin(uaddr, sizeof(u32))) return -EFAULT; asm volatile("\n" "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 6de57246760e..ebdd76b4ecbb 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -451,28 +451,6 @@ struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key * return NULL; } -int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval) -{ - int ret; - - pagefault_disable(); - ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval); - pagefault_enable(); - - return ret; -} - -int futex_get_value_locked(u32 *dest, u32 __user *from) -{ - int ret; - - pagefault_disable(); - ret = __get_user(*dest, from); - pagefault_enable(); - - return ret ? -EFAULT : 0; -} - /** * wait_for_owner_exiting - Block until the owner has exited * @ret: owner's current futex lock status diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index 8b195d06f4e8..618ce1fe870e 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -6,6 +6,7 @@ #include #include #include +#include #ifdef CONFIG_PREEMPT_RT #include @@ -225,10 +226,64 @@ extern bool __futex_wake_mark(struct futex_q *q); extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q); extern int fault_in_user_writeable(u32 __user *uaddr); -extern int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval); -extern int futex_get_value_locked(u32 *dest, u32 __user *from); extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key); +static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval) +{ + int ret; + + pagefault_disable(); + ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval); + pagefault_enable(); + + return ret; +} + +/* + * This does a plain atomic user space read, and the user pointer has + * already been verified earlier by get_futex_key() to be both aligned + * and actually in user space, just like futex_atomic_cmpxchg_inatomic(). + * + * We still want to avoid any speculation, and while __get_user() is + * the traditional model for this, it's actually slower than doing + * this manually these days. + * + * We could just have a per-architecture special function for it, + * the same way we do futex_atomic_cmpxchg_inatomic(), but rather + * than force everybody to do that, write it out long-hand using + * the low-level user-access infrastructure. + * + * This looks a bit overkill, but generally just results in a couple + * of instructions. + */ +static __always_inline int futex_read_inatomic(u32 *dest, u32 __user *from) +{ + u32 val; + + if (can_do_masked_user_access()) + from = masked_user_access_begin(from); + else if (!user_read_access_begin(from, sizeof(*from))) + return -EFAULT; + unsafe_get_user(val, from, Efault); + user_access_end(); + *dest = val; + return 0; +Efault: + user_access_end(); + return -EFAULT; +} + +static inline int futex_get_value_locked(u32 *dest, u32 __user *from) +{ + int ret; + + pagefault_disable(); + ret = futex_read_inatomic(dest, from); + pagefault_enable(); + + return ret; +} + extern void __futex_unqueue(struct futex_q *q); extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb); extern int futex_unqueue(struct futex_q *q); -- 2.51.0 From 573f45a9f9a47fed4c7957609689b772121b33d7 Mon Sep 17 00:00:00 2001 From: David Laight Date: Sun, 24 Nov 2024 15:39:00 +0000 Subject: [PATCH 13/16] x86: fix off-by-one in access_ok() When the size isn't a small constant, __access_ok() will call valid_user_address() with the address after the last byte of the user buffer. It is valid for a buffer to end with the last valid user address so valid_user_address() must allow accesses to the base of the guard page. [ This introduces an off-by-one in the other direction for the plain non-sized accesses, but since we have that guard region that is a whole page, those checks "allowing" accesses to that guard region don't really matter. The access will fault anyway, whether to the guard page or if the address has been masked to all ones - Linus ] Fixes: 86e6b1547b3d0 ("x86: fix user address masking non-canonical speculation issue") Signed-off-by: David Laight Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 06a516f6795b..ca327cfa42ae 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2389,12 +2389,12 @@ void __init arch_cpu_finalize_init(void) alternative_instructions(); if (IS_ENABLED(CONFIG_X86_64)) { - unsigned long USER_PTR_MAX = TASK_SIZE_MAX-1; + unsigned long USER_PTR_MAX = TASK_SIZE_MAX; /* * Enable this when LAM is gated on LASS support if (cpu_feature_enabled(X86_FEATURE_LAM)) - USER_PTR_MAX = (1ul << 63) - PAGE_SIZE - 1; + USER_PTR_MAX = (1ul << 63) - PAGE_SIZE; */ runtime_const_init(ptr, USER_PTR_MAX); -- 2.51.0 From 222974c6ec9d901f7ad13bbe6e505ec1f1d822d4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 25 Nov 2024 18:45:33 -0800 Subject: [PATCH 14/16] iommu: remove stale declaration left over by a merge conflict The merge commit ae3325f752ef ("Merge branches 'arm/smmu', 'mediatek', 's390', 'ti/omap', 'riscv' and 'core' into next") left a stale declaration of 'iommu_present()' even though the 'core' branch that was merged had removed the function (and the declaration). Remove it for real. Reported-by: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Joerg Roedel Signed-off-by: Linus Torvalds --- include/linux/iommu.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d6aaaec3caf4..301e97d745c1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -841,7 +841,6 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) }; } -extern bool iommu_present(const struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); extern bool iommu_group_has_isolated_msi(struct iommu_group *group); struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, unsigned int flags); -- 2.51.0 From 1dc707e647bc919834eff9636c8d00b78c782545 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 26 Nov 2024 17:54:58 -0800 Subject: [PATCH 15/16] rust: fix up formatting after merge When I merged the rust 'use' imports, I didn't realize that there's an offical preferred idiomatic format - so while it all worked fine, it doesn't match what 'make rustfmt' wants to make it. Fix it up appropriately. Suggested-by: Miguel Ojeda Signed-off-by: Linus Torvalds --- rust/kernel/task.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 7a76be583126..07bc22a7645c 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -6,11 +6,15 @@ use crate::{ bindings, + ffi::{c_int, c_long, c_uint}, pid_namespace::PidNamespace, types::{ARef, NotThreadSafe, Opaque}, }; -use crate::ffi::{c_int, c_long, c_uint}; -use core::{cmp::{Eq, PartialEq},ops::Deref, ptr}; +use core::{ + cmp::{Eq, PartialEq}, + ops::Deref, + ptr, +}; /// A sentinel value used for infinite timeouts. pub const MAX_SCHEDULE_TIMEOUT: c_long = c_long::MAX; -- 2.51.0 From 81de291d86b704de1809cfb06672902d003cf3a3 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 6 Nov 2024 17:33:28 +0800 Subject: [PATCH 16/16] of: dynamic: Add of_changeset_update_prop_string Add a helper function to add string property updates to an OF changeset. This is similar to of_changeset_add_prop_string(), but instead of adding the property (and failing if it exists), it will update the property. This shall be used later in the DT hardware prober. Signed-off-by: Chen-Yu Tsai Reviewed-by: Rob Herring (Arm) Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Wolfram Sang --- drivers/of/dynamic.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/of.h | 4 ++++ 2 files changed, 48 insertions(+) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index d45a8df61380..0aba760f7577 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -1072,3 +1072,47 @@ int of_changeset_add_prop_bool(struct of_changeset *ocs, struct device_node *np, return of_changeset_add_prop_helper(ocs, np, &prop); } EXPORT_SYMBOL_GPL(of_changeset_add_prop_bool); + +static int of_changeset_update_prop_helper(struct of_changeset *ocs, + struct device_node *np, + const struct property *pp) +{ + struct property *new_pp; + int ret; + + new_pp = __of_prop_dup(pp, GFP_KERNEL); + if (!new_pp) + return -ENOMEM; + + ret = of_changeset_update_property(ocs, np, new_pp); + if (ret) + __of_prop_free(new_pp); + + return ret; +} + +/** + * of_changeset_update_prop_string - Add a string property update to a changeset + * + * @ocs: changeset pointer + * @np: device node pointer + * @prop_name: name of the property to be updated + * @str: pointer to null terminated string + * + * Create a string property to be updated and add it to a changeset. + * + * Return: 0 on success, a negative error value in case of an error. + */ +int of_changeset_update_prop_string(struct of_changeset *ocs, + struct device_node *np, + const char *prop_name, const char *str) +{ + struct property prop = { + .name = (char *)prop_name, + .length = strlen(str) + 1, + .value = (void *)str, + }; + + return of_changeset_update_prop_helper(ocs, np, &prop); +} +EXPORT_SYMBOL_GPL(of_changeset_update_prop_string); diff --git a/include/linux/of.h b/include/linux/of.h index 086a60f3b8a6..d0307e3b093d 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1651,6 +1651,10 @@ static inline int of_changeset_add_prop_u32(struct of_changeset *ocs, return of_changeset_add_prop_u32_array(ocs, np, prop_name, &val, 1); } +int of_changeset_update_prop_string(struct of_changeset *ocs, + struct device_node *np, + const char *prop_name, const char *str); + int of_changeset_add_prop_bool(struct of_changeset *ocs, struct device_node *np, const char *prop_name); -- 2.51.0