From bec8a3712943282fcac10647905c021335e27c2b Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Wed, 12 Feb 2025 18:20:32 +0000 Subject: [PATCH 01/16] irqchip/renesas-rzg2l: Remove pm_put label No need to keep label `pm_put`, as it's only used once. Call pm_runtime_put() directly from the error path. Signed-off-by: Fabrizio Castro Signed-off-by: Thomas Gleixner Reviewed-by: Lad Prabhakar Link: https://lore.kernel.org/all/20250212182034.366167-5-fabrizio.castro.jz@renesas.com --- drivers/irqchip/irq-renesas-rzg2l.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index c024023a1832..0f325ceb0f53 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -586,9 +586,9 @@ static int rzg2l_irqc_common_init(struct device_node *node, struct device_node * node, &rzg2l_irqc_domain_ops, rzg2l_irqc_data); if (!irq_domain) { + pm_runtime_put(dev); dev_err(dev, "failed to add irq domain\n"); - ret = -ENOMEM; - goto pm_put; + return -ENOMEM; } register_syscore_ops(&rzg2l_irqc_syscore_ops); @@ -605,11 +605,6 @@ static int rzg2l_irqc_common_init(struct device_node *node, struct device_node * dev = NULL; return 0; - -pm_put: - pm_runtime_put(dev); - - return ret; } static int __init rzg2l_irqc_init(struct device_node *node, -- 2.51.0 From 4bd0317ce63c26c8a1b6e96c9be0badac749c6f7 Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Wed, 12 Feb 2025 18:20:33 +0000 Subject: [PATCH 02/16] irqchip/renesas-rzg2l: Switch to using dev_err_probe() Make use of dev_err_probe() to simplify rzg2l_irqc_common_init(). Signed-off-by: Fabrizio Castro Signed-off-by: Thomas Gleixner Reviewed-by: Lad Prabhakar Link: https://lore.kernel.org/all/20250212182034.366167-6-fabrizio.castro.jz@renesas.com --- drivers/irqchip/irq-renesas-rzg2l.c | 31 ++++++++++------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index 0f325ceb0f53..8714e7fdbbda 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -541,10 +541,8 @@ static int rzg2l_irqc_common_init(struct device_node *node, struct device_node * return -ENODEV; parent_domain = irq_find_host(parent); - if (!parent_domain) { - dev_err(dev, "cannot find parent domain\n"); - return -ENODEV; - } + if (!parent_domain) + return dev_err_probe(dev, -ENODEV, "cannot find parent domain\n"); rzg2l_irqc_data = devm_kzalloc(dev, sizeof(*rzg2l_irqc_data), GFP_KERNEL); if (!rzg2l_irqc_data) @@ -557,28 +555,22 @@ static int rzg2l_irqc_common_init(struct device_node *node, struct device_node * return PTR_ERR(rzg2l_irqc_data->base); ret = rzg2l_irqc_parse_interrupts(rzg2l_irqc_data, node); - if (ret) { - dev_err(dev, "cannot parse interrupts: %d\n", ret); - return ret; - } + if (ret) + return dev_err_probe(dev, ret, "cannot parse interrupts: %d\n", ret); resetn = devm_reset_control_get_exclusive_deasserted(dev, NULL); if (IS_ERR(resetn)) { - dev_err(dev, "failed to acquire deasserted reset: %d\n", ret); - return PTR_ERR(resetn); + return dev_err_probe(dev, PTR_ERR(resetn), + "failed to acquire deasserted reset: %d\n", ret); } ret = devm_pm_runtime_enable(dev); - if (ret < 0) { - dev_err(dev, "devm_pm_runtime_enable failed: %d\n", ret); - return ret; - } + if (ret < 0) + return dev_err_probe(dev, ret, "devm_pm_runtime_enable failed: %d\n", ret); ret = pm_runtime_resume_and_get(dev); - if (ret < 0) { - dev_err(dev, "pm_runtime_resume_and_get failed: %d\n", ret); - return ret; - } + if (ret < 0) + return dev_err_probe(dev, ret, "pm_runtime_resume_and_get failed: %d\n", ret); raw_spin_lock_init(&rzg2l_irqc_data->lock); @@ -587,8 +579,7 @@ static int rzg2l_irqc_common_init(struct device_node *node, struct device_node * rzg2l_irqc_data); if (!irq_domain) { pm_runtime_put(dev); - dev_err(dev, "failed to add irq domain\n"); - return -ENOMEM; + return dev_err_probe(dev, -ENOMEM, "failed to add irq domain\n"); } register_syscore_ops(&rzg2l_irqc_syscore_ops); -- 2.51.0 From 0699e578e27910224ee09a55b824e5d494ce280f Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Wed, 12 Feb 2025 18:20:34 +0000 Subject: [PATCH 03/16] irqchip/renesas-rzg2l: Simplify checks in rzg2l_irqc_common_init() Both devm_pm_runtime_enable() and pm_runtime_resume_and_get() return 0 or a negative error code. Simplify the checks done with their respective return values accordingly. Signed-off-by: Fabrizio Castro Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250212182034.366167-7-fabrizio.castro.jz@renesas.com --- drivers/irqchip/irq-renesas-rzg2l.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index 8714e7fdbbda..6a2e41f02446 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -565,11 +565,11 @@ static int rzg2l_irqc_common_init(struct device_node *node, struct device_node * } ret = devm_pm_runtime_enable(dev); - if (ret < 0) + if (ret) return dev_err_probe(dev, ret, "devm_pm_runtime_enable failed: %d\n", ret); ret = pm_runtime_resume_and_get(dev); - if (ret < 0) + if (ret) return dev_err_probe(dev, ret, "pm_runtime_resume_and_get failed: %d\n", ret); raw_spin_lock_init(&rzg2l_irqc_data->lock); -- 2.51.0 From 999f458c1771354371ba367dd84f55f9a62a4233 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 17 Feb 2025 14:26:47 +0530 Subject: [PATCH 04/16] irqchip/riscv-imsic: Set irq_set_affinity() for IMSIC base The IMSIC driver assigns the IMSIC domain specific imsic_irq_set_affinity() callback to the per device leaf MSI domain. That's a layering violation as it is called with the leaf domain data and not with the IMSIC domain data. This prevents moving the IMSIC driver to the common MSI library which uses the generic msi_domain_set_affinity() callback for device MSI domains. Instead of using imsic_irq_set_affinity() for leaf MSI domains, use imsic_irq_set_affinity() for the non-leaf IMSIC base domain and use irq_chip_set_affinity_parent() for leaf MSI domains. [ tglx: Massaged change log ] Signed-off-by: Andrew Jones Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250217085657.789309-2-apatel@ventanamicro.com --- drivers/irqchip/irq-riscv-imsic-platform.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c index c708780e8760..5d7c30ad8855 100644 --- a/drivers/irqchip/irq-riscv-imsic-platform.c +++ b/drivers/irqchip/irq-riscv-imsic-platform.c @@ -96,9 +96,8 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask bool force) { struct imsic_vector *old_vec, *new_vec; - struct irq_data *pd = d->parent_data; - old_vec = irq_data_get_irq_chip_data(pd); + old_vec = irq_data_get_irq_chip_data(d); if (WARN_ON(!old_vec)) return -ENOENT; @@ -116,13 +115,13 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask return -ENOSPC; /* Point device to the new vector */ - imsic_msi_update_msg(d, new_vec); + imsic_msi_update_msg(irq_get_irq_data(d->irq), new_vec); /* Update irq descriptors with the new vector */ - pd->chip_data = new_vec; + d->chip_data = new_vec; - /* Update effective affinity of parent irq data */ - irq_data_update_effective_affinity(pd, cpumask_of(new_vec->cpu)); + /* Update effective affinity */ + irq_data_update_effective_affinity(d, cpumask_of(new_vec->cpu)); /* Move state of the old vector to the new vector */ imsic_vector_move(old_vec, new_vec); @@ -135,6 +134,9 @@ static struct irq_chip imsic_irq_base_chip = { .name = "IMSIC", .irq_mask = imsic_irq_mask, .irq_unmask = imsic_irq_unmask, +#ifdef CONFIG_SMP + .irq_set_affinity = imsic_irq_set_affinity, +#endif .irq_retrigger = imsic_irq_retrigger, .irq_compose_msi_msg = imsic_irq_compose_msg, .flags = IRQCHIP_SKIP_SET_WAKE | @@ -245,7 +247,7 @@ static bool imsic_init_dev_msi_info(struct device *dev, if (WARN_ON_ONCE(domain != real_parent)) return false; #ifdef CONFIG_SMP - info->chip->irq_set_affinity = imsic_irq_set_affinity; + info->chip->irq_set_affinity = irq_chip_set_affinity_parent; #endif break; default: -- 2.51.0 From 1c000dcaad2bef20189f3868207f515ef4b637ee Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Feb 2025 14:26:48 +0530 Subject: [PATCH 05/16] irqchip/irq-msi-lib: Optionally set default irq_eoi()/irq_ack() msi_lib_init_dev_msi_info() sets the default irq_eoi()/irq_ack() callbacks unconditionally. This is correct for all existing users, but prevents the IMSIC driver to be moved to the MSI library implementation. Introduce chip_flags in struct msi_parent_ops, which instruct the library to selectively set the callbacks depending on the flags, and update all current users to set them. Signed-off-by: Thomas Gleixner Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250217085657.789309-3-apatel@ventanamicro.com --- drivers/irqchip/irq-gic-v2m.c | 1 + drivers/irqchip/irq-gic-v3-its-msi-parent.c | 1 + drivers/irqchip/irq-gic-v3-mbi.c | 1 + drivers/irqchip/irq-imx-mu-msi.c | 1 + drivers/irqchip/irq-loongson-pch-msi.c | 1 + drivers/irqchip/irq-msi-lib.c | 11 ++++++----- drivers/irqchip/irq-mvebu-gicp.c | 1 + drivers/irqchip/irq-mvebu-odmi.c | 1 + drivers/irqchip/irq-mvebu-sei.c | 1 + include/linux/msi.h | 11 +++++++++++ 10 files changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index be35c5349986..1e3476c335ca 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -255,6 +255,7 @@ static void __init gicv2m_teardown(void) static struct msi_parent_ops gicv2m_msi_parent_ops = { .supported_flags = GICV2M_MSI_FLAGS_SUPPORTED, .required_flags = GICV2M_MSI_FLAGS_REQUIRED, + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, .prefix = "GICv2m-", diff --git a/drivers/irqchip/irq-gic-v3-its-msi-parent.c b/drivers/irqchip/irq-gic-v3-its-msi-parent.c index e150365fbe89..bdb04c808148 100644 --- a/drivers/irqchip/irq-gic-v3-its-msi-parent.c +++ b/drivers/irqchip/irq-gic-v3-its-msi-parent.c @@ -203,6 +203,7 @@ static bool its_init_dev_msi_info(struct device *dev, struct irq_domain *domain, const struct msi_parent_ops gic_v3_its_msi_parent_ops = { .supported_flags = ITS_MSI_FLAGS_SUPPORTED, .required_flags = ITS_MSI_FLAGS_REQUIRED, + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, .prefix = "ITS-", diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c index 3fe870f8ee17..3e1d8a1cda5e 100644 --- a/drivers/irqchip/irq-gic-v3-mbi.c +++ b/drivers/irqchip/irq-gic-v3-mbi.c @@ -201,6 +201,7 @@ static bool mbi_init_dev_msi_info(struct device *dev, struct irq_domain *domain, static const struct msi_parent_ops gic_v3_mbi_msi_parent_ops = { .supported_flags = MBI_MSI_FLAGS_SUPPORTED, .required_flags = MBI_MSI_FLAGS_REQUIRED, + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, .prefix = "MBI-", diff --git a/drivers/irqchip/irq-imx-mu-msi.c b/drivers/irqchip/irq-imx-mu-msi.c index 4342a21de1eb..69aacdfc8bef 100644 --- a/drivers/irqchip/irq-imx-mu-msi.c +++ b/drivers/irqchip/irq-imx-mu-msi.c @@ -214,6 +214,7 @@ static void imx_mu_msi_irq_handler(struct irq_desc *desc) static const struct msi_parent_ops imx_mu_msi_parent_ops = { .supported_flags = IMX_MU_MSI_FLAGS_SUPPORTED, .required_flags = IMX_MU_MSI_FLAGS_REQUIRED, + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PLATFORM_MSI, .prefix = "MU-MSI-", diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c index bd337ecddb40..9c62108b3ad5 100644 --- a/drivers/irqchip/irq-loongson-pch-msi.c +++ b/drivers/irqchip/irq-loongson-pch-msi.c @@ -146,6 +146,7 @@ static const struct irq_domain_ops pch_msi_middle_domain_ops = { static struct msi_parent_ops pch_msi_parent_ops = { .required_flags = PCH_MSI_FLAGS_REQUIRED, .supported_flags = PCH_MSI_FLAGS_SUPPORTED, + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, .bus_select_mask = MATCH_PCI_MSI, .bus_select_token = DOMAIN_BUS_NEXUS, .prefix = "PCH-", diff --git a/drivers/irqchip/irq-msi-lib.c b/drivers/irqchip/irq-msi-lib.c index d8e29fc0d406..51464c6257f3 100644 --- a/drivers/irqchip/irq-msi-lib.c +++ b/drivers/irqchip/irq-msi-lib.c @@ -28,6 +28,7 @@ bool msi_lib_init_dev_msi_info(struct device *dev, struct irq_domain *domain, struct msi_domain_info *info) { const struct msi_parent_ops *pops = real_parent->msi_parent_ops; + struct irq_chip *chip = info->chip; u32 required_flags; /* Parent ops available? */ @@ -92,10 +93,10 @@ bool msi_lib_init_dev_msi_info(struct device *dev, struct irq_domain *domain, info->flags |= required_flags; /* Chip updates for all child bus types */ - if (!info->chip->irq_eoi) - info->chip->irq_eoi = irq_chip_eoi_parent; - if (!info->chip->irq_ack) - info->chip->irq_ack = irq_chip_ack_parent; + if (!chip->irq_eoi && (pops->chip_flags & MSI_CHIP_FLAG_SET_EOI)) + chip->irq_eoi = irq_chip_eoi_parent; + if (!chip->irq_ack && (pops->chip_flags & MSI_CHIP_FLAG_SET_ACK)) + chip->irq_ack = irq_chip_ack_parent; /* * The device MSI domain can never have a set affinity callback. It @@ -105,7 +106,7 @@ bool msi_lib_init_dev_msi_info(struct device *dev, struct irq_domain *domain, * device MSI domain aside of mask/unmask which is provided e.g. by * PCI/MSI device domains. */ - info->chip->irq_set_affinity = msi_domain_set_affinity; + chip->irq_set_affinity = msi_domain_set_affinity; return true; } EXPORT_SYMBOL_GPL(msi_lib_init_dev_msi_info); diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c index 2b6183919ea4..d67f93f6d750 100644 --- a/drivers/irqchip/irq-mvebu-gicp.c +++ b/drivers/irqchip/irq-mvebu-gicp.c @@ -161,6 +161,7 @@ static const struct irq_domain_ops gicp_domain_ops = { static const struct msi_parent_ops gicp_msi_parent_ops = { .supported_flags = GICP_MSI_FLAGS_SUPPORTED, .required_flags = GICP_MSI_FLAGS_REQUIRED, + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, .bus_select_token = DOMAIN_BUS_GENERIC_MSI, .bus_select_mask = MATCH_PLATFORM_MSI, .prefix = "GICP-", diff --git a/drivers/irqchip/irq-mvebu-odmi.c b/drivers/irqchip/irq-mvebu-odmi.c index ff19bfd258dc..28f7e81df94f 100644 --- a/drivers/irqchip/irq-mvebu-odmi.c +++ b/drivers/irqchip/irq-mvebu-odmi.c @@ -157,6 +157,7 @@ static const struct irq_domain_ops odmi_domain_ops = { static const struct msi_parent_ops odmi_msi_parent_ops = { .supported_flags = ODMI_MSI_FLAGS_SUPPORTED, .required_flags = ODMI_MSI_FLAGS_REQUIRED, + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, .bus_select_token = DOMAIN_BUS_GENERIC_MSI, .bus_select_mask = MATCH_PLATFORM_MSI, .prefix = "ODMI-", diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c index 065166ab5dbc..ebd4a9014e8d 100644 --- a/drivers/irqchip/irq-mvebu-sei.c +++ b/drivers/irqchip/irq-mvebu-sei.c @@ -356,6 +356,7 @@ static void mvebu_sei_reset(struct mvebu_sei *sei) static const struct msi_parent_ops sei_msi_parent_ops = { .supported_flags = SEI_MSI_FLAGS_SUPPORTED, .required_flags = SEI_MSI_FLAGS_REQUIRED, + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, .bus_select_mask = MATCH_PLATFORM_MSI, .bus_select_token = DOMAIN_BUS_GENERIC_MSI, .prefix = "SEI-", diff --git a/include/linux/msi.h b/include/linux/msi.h index b10093c4d00e..9abef442c146 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -558,11 +558,21 @@ enum { MSI_FLAG_NO_AFFINITY = (1 << 21), }; +/* + * Flags for msi_parent_ops::chip_flags + */ +enum { + MSI_CHIP_FLAG_SET_EOI = (1 << 0), + MSI_CHIP_FLAG_SET_ACK = (1 << 1), +}; + /** * struct msi_parent_ops - MSI parent domain callbacks and configuration info * * @supported_flags: Required: The supported MSI flags of the parent domain * @required_flags: Optional: The required MSI flags of the parent MSI domain + * @chip_flags: Optional: Select MSI chip callbacks to update with defaults + * in msi_lib_init_dev_msi_info(). * @bus_select_token: Optional: The bus token of the real parent domain for * irq_domain::select() * @bus_select_mask: Optional: A mask of supported BUS_DOMAINs for @@ -575,6 +585,7 @@ enum { struct msi_parent_ops { u32 supported_flags; u32 required_flags; + u32 chip_flags; u32 bus_select_token; u32 bus_select_mask; const char *prefix; -- 2.51.0 From fe35ecee8ec8d42b1d24ed70e5b33192294bcef0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Feb 2025 14:26:49 +0530 Subject: [PATCH 06/16] irqchip/riscv-imsic: Move to common MSI library Simplify the leaf MSI domain handling in the RISC-V IMSIC driver by using msi_lib_init_dev_msi_info() and msi_lib_irq_domain_select() provided by the common MSI library. Signed-off-by: Thomas Gleixner Signed-off-by: Andrew Jones Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250217085657.789309-4-apatel@ventanamicro.com --- drivers/irqchip/Kconfig | 8 +- drivers/irqchip/irq-riscv-imsic-platform.c | 114 +-------------------- 2 files changed, 6 insertions(+), 116 deletions(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index be063bfb50c4..bc3f12af2dc7 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -589,13 +589,7 @@ config RISCV_IMSIC select IRQ_DOMAIN_HIERARCHY select GENERIC_IRQ_MATRIX_ALLOCATOR select GENERIC_MSI_IRQ - -config RISCV_IMSIC_PCI - bool - depends on RISCV_IMSIC - depends on PCI - depends on PCI_MSI - default RISCV_IMSIC + select IRQ_MSI_LIB config SIFIVE_PLIC bool diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c index 5d7c30ad8855..9a5e7b4541f6 100644 --- a/drivers/irqchip/irq-riscv-imsic-platform.c +++ b/drivers/irqchip/irq-riscv-imsic-platform.c @@ -20,6 +20,7 @@ #include #include +#include "irq-msi-lib.h" #include "irq-riscv-imsic-state.h" static bool imsic_cpu_page_phys(unsigned int cpu, unsigned int guest_index, @@ -174,22 +175,6 @@ static void imsic_irq_domain_free(struct irq_domain *domain, unsigned int virq, irq_domain_free_irqs_parent(domain, virq, nr_irqs); } -static int imsic_irq_domain_select(struct irq_domain *domain, struct irq_fwspec *fwspec, - enum irq_domain_bus_token bus_token) -{ - const struct msi_parent_ops *ops = domain->msi_parent_ops; - u32 busmask = BIT(bus_token); - - if (fwspec->fwnode != domain->fwnode || fwspec->param_count != 0) - return 0; - - /* Handle pure domain searches */ - if (bus_token == ops->bus_select_token) - return 1; - - return !!(ops->bus_select_mask & busmask); -} - #ifdef CONFIG_GENERIC_IRQ_DEBUGFS static void imsic_irq_debug_show(struct seq_file *m, struct irq_domain *d, struct irq_data *irqd, int ind) @@ -206,110 +191,21 @@ static void imsic_irq_debug_show(struct seq_file *m, struct irq_domain *d, static const struct irq_domain_ops imsic_base_domain_ops = { .alloc = imsic_irq_domain_alloc, .free = imsic_irq_domain_free, - .select = imsic_irq_domain_select, + .select = msi_lib_irq_domain_select, #ifdef CONFIG_GENERIC_IRQ_DEBUGFS .debug_show = imsic_irq_debug_show, #endif }; -#ifdef CONFIG_RISCV_IMSIC_PCI - -static void imsic_pci_mask_irq(struct irq_data *d) -{ - pci_msi_mask_irq(d); - irq_chip_mask_parent(d); -} - -static void imsic_pci_unmask_irq(struct irq_data *d) -{ - irq_chip_unmask_parent(d); - pci_msi_unmask_irq(d); -} - -#define MATCH_PCI_MSI BIT(DOMAIN_BUS_PCI_MSI) - -#else - -#define MATCH_PCI_MSI 0 - -#endif - -static bool imsic_init_dev_msi_info(struct device *dev, - struct irq_domain *domain, - struct irq_domain *real_parent, - struct msi_domain_info *info) -{ - const struct msi_parent_ops *pops = real_parent->msi_parent_ops; - - /* MSI parent domain specific settings */ - switch (real_parent->bus_token) { - case DOMAIN_BUS_NEXUS: - if (WARN_ON_ONCE(domain != real_parent)) - return false; -#ifdef CONFIG_SMP - info->chip->irq_set_affinity = irq_chip_set_affinity_parent; -#endif - break; - default: - WARN_ON_ONCE(1); - return false; - } - - /* Is the target supported? */ - switch (info->bus_token) { -#ifdef CONFIG_RISCV_IMSIC_PCI - case DOMAIN_BUS_PCI_DEVICE_MSI: - case DOMAIN_BUS_PCI_DEVICE_MSIX: - info->chip->irq_mask = imsic_pci_mask_irq; - info->chip->irq_unmask = imsic_pci_unmask_irq; - break; -#endif - case DOMAIN_BUS_DEVICE_MSI: - /* - * Per-device MSI should never have any MSI feature bits - * set. It's sole purpose is to create a dumb interrupt - * chip which has a device specific irq_write_msi_msg() - * callback. - */ - if (WARN_ON_ONCE(info->flags)) - return false; - - /* Core managed MSI descriptors */ - info->flags |= MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS | - MSI_FLAG_FREE_MSI_DESCS; - break; - case DOMAIN_BUS_WIRED_TO_MSI: - break; - default: - WARN_ON_ONCE(1); - return false; - } - - /* Use hierarchial chip operations re-trigger */ - info->chip->irq_retrigger = irq_chip_retrigger_hierarchy; - - /* - * Mask out the domain specific MSI feature flags which are not - * supported by the real parent. - */ - info->flags &= pops->supported_flags; - - /* Enforce the required flags */ - info->flags |= pops->required_flags; - - return true; -} - -#define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI) - static const struct msi_parent_ops imsic_msi_parent_ops = { .supported_flags = MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX, .required_flags = MSI_FLAG_USE_DEF_DOM_OPS | - MSI_FLAG_USE_DEF_CHIP_OPS, + MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_PCI_MSI_MASK_PARENT, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, - .init_dev_msi_info = imsic_init_dev_msi_info, + .init_dev_msi_info = msi_lib_init_dev_msi_info, }; int imsic_irqdomain_init(void) -- 2.51.0 From 751dc837dabd275d0ab165fc737c10f80e2e863a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Feb 2025 14:26:50 +0530 Subject: [PATCH 07/16] genirq: Introduce common irq_force_complete_move() implementation CONFIG_GENERIC_PENDING_IRQ requires an architecture specific implementation of irq_force_complete_move() for CPU hotplug. At the moment, only x86 implements this unconditionally, but for RISC-V irq_force_complete_move() is only needed when the RISC-V IMSIC driver is in use and not needed otherwise. To allow runtime configuration of this mechanism, introduce a common irq_force_complete_move() implementation in the interrupt core code, which only invokes the completion function, when a interrupt chip in the hierarchy implements it. Switch X86 over to the new mechanism. No functional change intended. Signed-off-by: Thomas Gleixner Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250217085657.789309-5-apatel@ventanamicro.com --- arch/x86/kernel/apic/vector.c | 231 ++++++++++++++++------------------ include/linux/irq.h | 5 +- kernel/irq/internals.h | 2 + kernel/irq/migration.c | 10 ++ 4 files changed, 123 insertions(+), 125 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 736f62812f5c..72fa4bb78f0a 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -888,8 +888,109 @@ static int apic_set_affinity(struct irq_data *irqd, return err ? err : IRQ_SET_MASK_OK; } +static void free_moved_vector(struct apic_chip_data *apicd) +{ + unsigned int vector = apicd->prev_vector; + unsigned int cpu = apicd->prev_cpu; + bool managed = apicd->is_managed; + + /* + * Managed interrupts are usually not migrated away + * from an online CPU, but CPU isolation 'managed_irq' + * can make that happen. + * 1) Activation does not take the isolation into account + * to keep the code simple + * 2) Migration away from an isolated CPU can happen when + * a non-isolated CPU which is in the calculated + * affinity mask comes online. + */ + trace_vector_free_moved(apicd->irq, cpu, vector, managed); + irq_matrix_free(vector_matrix, cpu, vector, managed); + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; + hlist_del_init(&apicd->clist); + apicd->prev_vector = 0; + apicd->move_in_progress = 0; +} + +/* + * Called from fixup_irqs() with @desc->lock held and interrupts disabled. + */ +static void apic_force_complete_move(struct irq_data *irqd) +{ + unsigned int cpu = smp_processor_id(); + struct apic_chip_data *apicd; + unsigned int vector; + + guard(raw_spinlock)(&vector_lock); + apicd = apic_chip_data(irqd); + if (!apicd) + return; + + /* + * If prev_vector is empty or the descriptor is neither currently + * nor previously on the outgoing CPU no action required. + */ + vector = apicd->prev_vector; + if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu)) + return; + + /* + * This is tricky. If the cleanup of the old vector has not been + * done yet, then the following setaffinity call will fail with + * -EBUSY. This can leave the interrupt in a stale state. + * + * All CPUs are stuck in stop machine with interrupts disabled so + * calling __irq_complete_move() would be completely pointless. + * + * 1) The interrupt is in move_in_progress state. That means that we + * have not seen an interrupt since the io_apic was reprogrammed to + * the new vector. + * + * 2) The interrupt has fired on the new vector, but the cleanup IPIs + * have not been processed yet. + */ + if (apicd->move_in_progress) { + /* + * In theory there is a race: + * + * set_ioapic(new_vector) <-- Interrupt is raised before update + * is effective, i.e. it's raised on + * the old vector. + * + * So if the target cpu cannot handle that interrupt before + * the old vector is cleaned up, we get a spurious interrupt + * and in the worst case the ioapic irq line becomes stale. + * + * But in case of cpu hotplug this should be a non issue + * because if the affinity update happens right before all + * cpus rendezvous in stop machine, there is no way that the + * interrupt can be blocked on the target cpu because all cpus + * loops first with interrupts enabled in stop machine, so the + * old vector is not yet cleaned up when the interrupt fires. + * + * So the only way to run into this issue is if the delivery + * of the interrupt on the apic/system bus would be delayed + * beyond the point where the target cpu disables interrupts + * in stop machine. I doubt that it can happen, but at least + * there is a theoretical chance. Virtualization might be + * able to expose this, but AFAICT the IOAPIC emulation is not + * as stupid as the real hardware. + * + * Anyway, there is nothing we can do about that at this point + * w/o refactoring the whole fixup_irq() business completely. + * We print at least the irq number and the old vector number, + * so we have the necessary information when a problem in that + * area arises. + */ + pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n", + irqd->irq, vector); + } + free_moved_vector(apicd); +} + #else -# define apic_set_affinity NULL +# define apic_set_affinity NULL +# define apic_force_complete_move NULL #endif static int apic_retrigger_irq(struct irq_data *irqd) @@ -923,39 +1024,16 @@ static void x86_vector_msi_compose_msg(struct irq_data *data, } static struct irq_chip lapic_controller = { - .name = "APIC", - .irq_ack = apic_ack_edge, - .irq_set_affinity = apic_set_affinity, - .irq_compose_msi_msg = x86_vector_msi_compose_msg, - .irq_retrigger = apic_retrigger_irq, + .name = "APIC", + .irq_ack = apic_ack_edge, + .irq_set_affinity = apic_set_affinity, + .irq_compose_msi_msg = x86_vector_msi_compose_msg, + .irq_force_complete_move = apic_force_complete_move, + .irq_retrigger = apic_retrigger_irq, }; #ifdef CONFIG_SMP -static void free_moved_vector(struct apic_chip_data *apicd) -{ - unsigned int vector = apicd->prev_vector; - unsigned int cpu = apicd->prev_cpu; - bool managed = apicd->is_managed; - - /* - * Managed interrupts are usually not migrated away - * from an online CPU, but CPU isolation 'managed_irq' - * can make that happen. - * 1) Activation does not take the isolation into account - * to keep the code simple - * 2) Migration away from an isolated CPU can happen when - * a non-isolated CPU which is in the calculated - * affinity mask comes online. - */ - trace_vector_free_moved(apicd->irq, cpu, vector, managed); - irq_matrix_free(vector_matrix, cpu, vector, managed); - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; - hlist_del_init(&apicd->clist); - apicd->prev_vector = 0; - apicd->move_in_progress = 0; -} - static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr) { struct apic_chip_data *apicd; @@ -1068,99 +1146,6 @@ void irq_complete_move(struct irq_cfg *cfg) __vector_schedule_cleanup(apicd); } -/* - * Called from fixup_irqs() with @desc->lock held and interrupts disabled. - */ -void irq_force_complete_move(struct irq_desc *desc) -{ - unsigned int cpu = smp_processor_id(); - struct apic_chip_data *apicd; - struct irq_data *irqd; - unsigned int vector; - - /* - * The function is called for all descriptors regardless of which - * irqdomain they belong to. For example if an IRQ is provided by - * an irq_chip as part of a GPIO driver, the chip data for that - * descriptor is specific to the irq_chip in question. - * - * Check first that the chip_data is what we expect - * (apic_chip_data) before touching it any further. - */ - irqd = irq_domain_get_irq_data(x86_vector_domain, - irq_desc_get_irq(desc)); - if (!irqd) - return; - - raw_spin_lock(&vector_lock); - apicd = apic_chip_data(irqd); - if (!apicd) - goto unlock; - - /* - * If prev_vector is empty or the descriptor is neither currently - * nor previously on the outgoing CPU no action required. - */ - vector = apicd->prev_vector; - if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu)) - goto unlock; - - /* - * This is tricky. If the cleanup of the old vector has not been - * done yet, then the following setaffinity call will fail with - * -EBUSY. This can leave the interrupt in a stale state. - * - * All CPUs are stuck in stop machine with interrupts disabled so - * calling __irq_complete_move() would be completely pointless. - * - * 1) The interrupt is in move_in_progress state. That means that we - * have not seen an interrupt since the io_apic was reprogrammed to - * the new vector. - * - * 2) The interrupt has fired on the new vector, but the cleanup IPIs - * have not been processed yet. - */ - if (apicd->move_in_progress) { - /* - * In theory there is a race: - * - * set_ioapic(new_vector) <-- Interrupt is raised before update - * is effective, i.e. it's raised on - * the old vector. - * - * So if the target cpu cannot handle that interrupt before - * the old vector is cleaned up, we get a spurious interrupt - * and in the worst case the ioapic irq line becomes stale. - * - * But in case of cpu hotplug this should be a non issue - * because if the affinity update happens right before all - * cpus rendezvous in stop machine, there is no way that the - * interrupt can be blocked on the target cpu because all cpus - * loops first with interrupts enabled in stop machine, so the - * old vector is not yet cleaned up when the interrupt fires. - * - * So the only way to run into this issue is if the delivery - * of the interrupt on the apic/system bus would be delayed - * beyond the point where the target cpu disables interrupts - * in stop machine. I doubt that it can happen, but at least - * there is a theoretical chance. Virtualization might be - * able to expose this, but AFAICT the IOAPIC emulation is not - * as stupid as the real hardware. - * - * Anyway, there is nothing we can do about that at this point - * w/o refactoring the whole fixup_irq() business completely. - * We print at least the irq number and the old vector number, - * so we have the necessary information when a problem in that - * area arises. - */ - pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n", - irqd->irq, vector); - } - free_moved_vector(apicd); -unlock: - raw_spin_unlock(&vector_lock); -} - #ifdef CONFIG_HOTPLUG_CPU /* * Note, this is not accurate accounting, but at least good enough to diff --git a/include/linux/irq.h b/include/linux/irq.h index 8daa17f0107a..56f6583093d2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -486,6 +486,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @ipi_send_mask: send an IPI to destination cpus in cpumask * @irq_nmi_setup: function called from core code before enabling an NMI * @irq_nmi_teardown: function called from core code after disabling an NMI + * @irq_force_complete_move: optional function to force complete pending irq move * @flags: chip specific flags */ struct irq_chip { @@ -537,6 +538,8 @@ struct irq_chip { int (*irq_nmi_setup)(struct irq_data *data); void (*irq_nmi_teardown)(struct irq_data *data); + void (*irq_force_complete_move)(struct irq_data *data); + unsigned long flags; }; @@ -619,11 +622,9 @@ static inline void irq_move_irq(struct irq_data *data) __irq_move_irq(data); } void irq_move_masked_irq(struct irq_data *data); -void irq_force_complete_move(struct irq_desc *desc); #else static inline void irq_move_irq(struct irq_data *data) { } static inline void irq_move_masked_irq(struct irq_data *data) { } -static inline void irq_force_complete_move(struct irq_desc *desc) { } #endif extern int no_irq_affinity; diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index a979523640d0..d4e190e690bd 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -442,6 +442,7 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc) return desc->pending_mask; } bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); +void irq_force_complete_move(struct irq_desc *desc); #else /* CONFIG_GENERIC_PENDING_IRQ */ static inline bool irq_can_move_pcntxt(struct irq_data *data) { @@ -467,6 +468,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) { return false; } +static inline void irq_force_complete_move(struct irq_desc *desc) { } #endif /* !CONFIG_GENERIC_PENDING_IRQ */ #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index eb150afd671f..e110300ad650 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -35,6 +35,16 @@ bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear) return true; } +void irq_force_complete_move(struct irq_desc *desc) +{ + for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = d->parent_data) { + if (d->chip && d->chip->irq_force_complete_move) { + d->chip->irq_force_complete_move(d); + return; + } + } +} + void irq_move_masked_irq(struct irq_data *idata) { struct irq_desc *desc = irq_data_to_desc(idata); -- 2.51.0 From e54b1b5e89ae765e6d71d41883a8f551fde8d0ab Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 17 Feb 2025 14:26:51 +0530 Subject: [PATCH 08/16] genirq: Introduce irq_can_move_in_process_context() Interrupt controller drivers which enable CONFIG_GENERIC_PENDING_IRQ require to know whether an interrupt can be moved in process context or not to decide whether they need to invoke the work around for non-atomic MSI updates or not. This information can be retrieved via irq_can_move_pcntxt(). That helper requires access to the top-most interrupt domain data, but the driver which requires this is usually further down in the hierarchy. Introduce irq_can_move_in_process_context() which retrieves that information from the top-most interrupt domain data. [ tglx: Massaged change log ] Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250217085657.789309-6-apatel@ventanamicro.com --- include/linux/irq.h | 2 ++ kernel/irq/migration.c | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/include/linux/irq.h b/include/linux/irq.h index 56f6583093d2..dd5df1e2d032 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -615,6 +615,7 @@ extern int irq_affinity_online_cpu(unsigned int cpu); #endif #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) +bool irq_can_move_in_process_context(struct irq_data *data); void __irq_move_irq(struct irq_data *data); static inline void irq_move_irq(struct irq_data *data) { @@ -623,6 +624,7 @@ static inline void irq_move_irq(struct irq_data *data) } void irq_move_masked_irq(struct irq_data *data); #else +static inline bool irq_can_move_in_process_context(struct irq_data *data) { return true; } static inline void irq_move_irq(struct irq_data *data) { } static inline void irq_move_masked_irq(struct irq_data *data) { } #endif diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index e110300ad650..147cabb4c077 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -127,3 +127,13 @@ void __irq_move_irq(struct irq_data *idata) if (!masked) idata->chip->irq_unmask(idata); } + +bool irq_can_move_in_process_context(struct irq_data *data) +{ + /* + * Get the top level irq_data in the hierarchy, which is optimized + * away when CONFIG_IRQ_DOMAIN_HIERARCHY is disabled. + */ + data = irq_desc_get_irq_data(irq_data_to_desc(data)); + return irq_can_move_pcntxt(data); +} -- 2.51.0 From 58d868b67a9ac0db477f714939f21849db5f5178 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 17 Feb 2025 14:26:52 +0530 Subject: [PATCH 09/16] RISC-V: Select CONFIG_GENERIC_PENDING_IRQ Enable CONFIG_GENERIC_PENDING_IRQ for RISC-V so that RISC-V interrupt chips can support delayed interrupt mirgration in interrupt context. Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250217085657.789309-7-apatel@ventanamicro.com --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7612c52e9b1e..a32f39748775 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -111,6 +111,7 @@ config RISCV select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_LIB_DEVMEM_IS_ALLOWED + select GENERIC_PENDING_IRQ if SMP select GENERIC_PCI_IOMAP select GENERIC_PTDUMP if MMU select GENERIC_SCHED_CLOCK -- 2.51.0 From 0f67911e821c67ecfccc365a2103ce276a9a56fe Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 17 Feb 2025 14:26:53 +0530 Subject: [PATCH 10/16] irqchip/riscv-imsic: Separate next and previous pointers in IMSIC vector Currently, there is only one "move" pointer in struct imsic_vector so during vector movement the old vector points to the new vector and new vector points to itself. To support forced cleanup of the old vector, add separate "move_next" and "move_prev" pointers to struct imsic_vector, where during vector movement the "move_next" pointer of the old vector points to the new vector and the "move_prev" pointer of the new vector points to the old vector. Both "move_next" and "move_prev" pointers are cleared separately by __imsic_local_sync() with a restriction that "move_prev" on the new CPU is cleared only after the old CPU has cleared "move_next". Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250217085657.789309-8-apatel@ventanamicro.com --- drivers/irqchip/irq-riscv-imsic-early.c | 8 ++- drivers/irqchip/irq-riscv-imsic-state.c | 96 +++++++++++++++++-------- drivers/irqchip/irq-riscv-imsic-state.h | 7 +- 3 files changed, 78 insertions(+), 33 deletions(-) diff --git a/drivers/irqchip/irq-riscv-imsic-early.c b/drivers/irqchip/irq-riscv-imsic-early.c index c5c2e6929a2f..b5def6268936 100644 --- a/drivers/irqchip/irq-riscv-imsic-early.c +++ b/drivers/irqchip/irq-riscv-imsic-early.c @@ -77,6 +77,12 @@ static void imsic_handle_irq(struct irq_desc *desc) struct imsic_vector *vec; unsigned long local_id; + /* + * Process pending local synchronization instead of waiting + * for per-CPU local timer to expire. + */ + imsic_local_sync_all(false); + chained_irq_enter(chip, desc); while ((local_id = csr_swap(CSR_TOPEI, 0))) { @@ -120,7 +126,7 @@ static int imsic_starting_cpu(unsigned int cpu) * Interrupts identities might have been enabled/disabled while * this CPU was not running so sync-up local enable/disable state. */ - imsic_local_sync_all(); + imsic_local_sync_all(true); /* Enable local interrupt delivery */ imsic_local_delivery(true); diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c index b97e6cd89ed7..1aeba76d7279 100644 --- a/drivers/irqchip/irq-riscv-imsic-state.c +++ b/drivers/irqchip/irq-riscv-imsic-state.c @@ -124,10 +124,11 @@ void __imsic_eix_update(unsigned long base_id, unsigned long num_id, bool pend, } } -static void __imsic_local_sync(struct imsic_local_priv *lpriv) +static bool __imsic_local_sync(struct imsic_local_priv *lpriv) { struct imsic_local_config *mlocal; struct imsic_vector *vec, *mvec; + bool ret = true; int i; lockdep_assert_held(&lpriv->lock); @@ -143,35 +144,75 @@ static void __imsic_local_sync(struct imsic_local_priv *lpriv) __imsic_id_clear_enable(i); /* - * If the ID was being moved to a new ID on some other CPU - * then we can get a MSI during the movement so check the - * ID pending bit and re-trigger the new ID on other CPU - * using MMIO write. + * Clear the previous vector pointer of the new vector only + * after the movement is complete on the old CPU. */ - mvec = READ_ONCE(vec->move); - WRITE_ONCE(vec->move, NULL); - if (mvec && mvec != vec) { + mvec = READ_ONCE(vec->move_prev); + if (mvec) { + /* + * If the old vector has not been updated then + * try again in the next sync-up call. + */ + if (READ_ONCE(mvec->move_next)) { + ret = false; + continue; + } + + WRITE_ONCE(vec->move_prev, NULL); + } + + /* + * If a vector was being moved to a new vector on some other + * CPU then we can get a MSI during the movement so check the + * ID pending bit and re-trigger the new ID on other CPU using + * MMIO write. + */ + mvec = READ_ONCE(vec->move_next); + if (mvec) { if (__imsic_id_read_clear_pending(i)) { mlocal = per_cpu_ptr(imsic->global.local, mvec->cpu); writel_relaxed(mvec->local_id, mlocal->msi_va); } + WRITE_ONCE(vec->move_next, NULL); imsic_vector_free(&lpriv->vectors[i]); } skip: bitmap_clear(lpriv->dirty_bitmap, i, 1); } + + return ret; } -void imsic_local_sync_all(void) +#ifdef CONFIG_SMP +static void __imsic_local_timer_start(struct imsic_local_priv *lpriv) +{ + lockdep_assert_held(&lpriv->lock); + + if (!timer_pending(&lpriv->timer)) { + lpriv->timer.expires = jiffies + 1; + add_timer_on(&lpriv->timer, smp_processor_id()); + } +} +#else +static inline void __imsic_local_timer_start(struct imsic_local_priv *lpriv) +{ +} +#endif + +void imsic_local_sync_all(bool force_all) { struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv); unsigned long flags; raw_spin_lock_irqsave(&lpriv->lock, flags); - bitmap_fill(lpriv->dirty_bitmap, imsic->global.nr_ids + 1); - __imsic_local_sync(lpriv); + + if (force_all) + bitmap_fill(lpriv->dirty_bitmap, imsic->global.nr_ids + 1); + if (!__imsic_local_sync(lpriv)) + __imsic_local_timer_start(lpriv); + raw_spin_unlock_irqrestore(&lpriv->lock, flags); } @@ -190,12 +231,7 @@ void imsic_local_delivery(bool enable) #ifdef CONFIG_SMP static void imsic_local_timer_callback(struct timer_list *timer) { - struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv); - unsigned long flags; - - raw_spin_lock_irqsave(&lpriv->lock, flags); - __imsic_local_sync(lpriv); - raw_spin_unlock_irqrestore(&lpriv->lock, flags); + imsic_local_sync_all(false); } static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu) @@ -216,14 +252,11 @@ static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu */ if (cpu_online(cpu)) { if (cpu == smp_processor_id()) { - __imsic_local_sync(lpriv); - return; + if (__imsic_local_sync(lpriv)) + return; } - if (!timer_pending(&lpriv->timer)) { - lpriv->timer.expires = jiffies + 1; - add_timer_on(&lpriv->timer, cpu); - } + __imsic_local_timer_start(lpriv); } } #else @@ -278,8 +311,9 @@ void imsic_vector_unmask(struct imsic_vector *vec) raw_spin_unlock(&lpriv->lock); } -static bool imsic_vector_move_update(struct imsic_local_priv *lpriv, struct imsic_vector *vec, - bool new_enable, struct imsic_vector *new_move) +static bool imsic_vector_move_update(struct imsic_local_priv *lpriv, + struct imsic_vector *vec, bool is_old_vec, + bool new_enable, struct imsic_vector *move_vec) { unsigned long flags; bool enabled; @@ -289,7 +323,10 @@ static bool imsic_vector_move_update(struct imsic_local_priv *lpriv, struct imsi /* Update enable and move details */ enabled = READ_ONCE(vec->enable); WRITE_ONCE(vec->enable, new_enable); - WRITE_ONCE(vec->move, new_move); + if (is_old_vec) + WRITE_ONCE(vec->move_next, move_vec); + else + WRITE_ONCE(vec->move_prev, move_vec); /* Mark the vector as dirty and synchronize */ bitmap_set(lpriv->dirty_bitmap, vec->local_id, 1); @@ -322,8 +359,8 @@ void imsic_vector_move(struct imsic_vector *old_vec, struct imsic_vector *new_ve * interrupt on the old vector while device was being moved * to the new vector. */ - enabled = imsic_vector_move_update(old_lpriv, old_vec, false, new_vec); - imsic_vector_move_update(new_lpriv, new_vec, enabled, new_vec); + enabled = imsic_vector_move_update(old_lpriv, old_vec, true, false, new_vec); + imsic_vector_move_update(new_lpriv, new_vec, false, enabled, old_vec); } #ifdef CONFIG_GENERIC_IRQ_DEBUGFS @@ -386,7 +423,8 @@ struct imsic_vector *imsic_vector_alloc(unsigned int hwirq, const struct cpumask vec = &lpriv->vectors[local_id]; vec->hwirq = hwirq; vec->enable = false; - vec->move = NULL; + vec->move_next = NULL; + vec->move_prev = NULL; return vec; } diff --git a/drivers/irqchip/irq-riscv-imsic-state.h b/drivers/irqchip/irq-riscv-imsic-state.h index 391e44280827..f02842b84ed5 100644 --- a/drivers/irqchip/irq-riscv-imsic-state.h +++ b/drivers/irqchip/irq-riscv-imsic-state.h @@ -23,7 +23,8 @@ struct imsic_vector { unsigned int hwirq; /* Details accessed using local lock held */ bool enable; - struct imsic_vector *move; + struct imsic_vector *move_next; + struct imsic_vector *move_prev; }; struct imsic_local_priv { @@ -74,7 +75,7 @@ static inline void __imsic_id_clear_enable(unsigned long id) __imsic_eix_update(id, 1, false, false); } -void imsic_local_sync_all(void); +void imsic_local_sync_all(bool force_all); void imsic_local_delivery(bool enable); void imsic_vector_mask(struct imsic_vector *vec); @@ -87,7 +88,7 @@ static inline bool imsic_vector_isenabled(struct imsic_vector *vec) static inline struct imsic_vector *imsic_vector_get_move(struct imsic_vector *vec) { - return READ_ONCE(vec->move); + return READ_ONCE(vec->move_prev); } void imsic_vector_move(struct imsic_vector *old_vec, struct imsic_vector *new_vec); -- 2.51.0 From 51611130d57d2061729010bd0575701aa4b7ff74 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 17 Feb 2025 14:26:54 +0530 Subject: [PATCH 11/16] irqchip/riscv-imsic: Implement irq_force_complete_move() for IMSIC Implement irq_force_complete_move() for IMSIC driver so that in-flight vector movements on a CPU can be cleaned-up when the CPU goes down. Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250217085657.789309-9-apatel@ventanamicro.com --- drivers/irqchip/irq-riscv-imsic-platform.c | 32 ++++++++++++++++++++++ drivers/irqchip/irq-riscv-imsic-state.c | 17 ++++++++++++ drivers/irqchip/irq-riscv-imsic-state.h | 1 + 3 files changed, 50 insertions(+) diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c index 9a5e7b4541f6..b9e3f9030bdf 100644 --- a/drivers/irqchip/irq-riscv-imsic-platform.c +++ b/drivers/irqchip/irq-riscv-imsic-platform.c @@ -129,6 +129,37 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask return IRQ_SET_MASK_OK_DONE; } + +static void imsic_irq_force_complete_move(struct irq_data *d) +{ + struct imsic_vector *mvec, *vec = irq_data_get_irq_chip_data(d); + unsigned int cpu = smp_processor_id(); + + if (WARN_ON(!vec)) + return; + + /* Do nothing if there is no in-flight move */ + mvec = imsic_vector_get_move(vec); + if (!mvec) + return; + + /* Do nothing if the old IMSIC vector does not belong to current CPU */ + if (mvec->cpu != cpu) + return; + + /* + * The best we can do is force cleanup the old IMSIC vector. + * + * The challenges over here are same as x86 vector domain so + * refer to the comments in irq_force_complete_move() function + * implemented at arch/x86/kernel/apic/vector.c. + */ + + /* Force cleanup in-flight move */ + pr_info("IRQ fixup: irq %d move in progress, old vector cpu %d local_id %d\n", + d->irq, mvec->cpu, mvec->local_id); + imsic_vector_force_move_cleanup(vec); +} #endif static struct irq_chip imsic_irq_base_chip = { @@ -137,6 +168,7 @@ static struct irq_chip imsic_irq_base_chip = { .irq_unmask = imsic_irq_unmask, #ifdef CONFIG_SMP .irq_set_affinity = imsic_irq_set_affinity, + .irq_force_complete_move = imsic_irq_force_complete_move, #endif .irq_retrigger = imsic_irq_retrigger, .irq_compose_msi_msg = imsic_irq_compose_msg, diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c index 1aeba76d7279..eb0a9b6a5239 100644 --- a/drivers/irqchip/irq-riscv-imsic-state.c +++ b/drivers/irqchip/irq-riscv-imsic-state.c @@ -311,6 +311,23 @@ void imsic_vector_unmask(struct imsic_vector *vec) raw_spin_unlock(&lpriv->lock); } +void imsic_vector_force_move_cleanup(struct imsic_vector *vec) +{ + struct imsic_local_priv *lpriv; + struct imsic_vector *mvec; + unsigned long flags; + + lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu); + raw_spin_lock_irqsave(&lpriv->lock, flags); + + mvec = READ_ONCE(vec->move_prev); + WRITE_ONCE(vec->move_prev, NULL); + if (mvec) + imsic_vector_free(mvec); + + raw_spin_unlock_irqrestore(&lpriv->lock, flags); +} + static bool imsic_vector_move_update(struct imsic_local_priv *lpriv, struct imsic_vector *vec, bool is_old_vec, bool new_enable, struct imsic_vector *move_vec) diff --git a/drivers/irqchip/irq-riscv-imsic-state.h b/drivers/irqchip/irq-riscv-imsic-state.h index f02842b84ed5..19dea0c77738 100644 --- a/drivers/irqchip/irq-riscv-imsic-state.h +++ b/drivers/irqchip/irq-riscv-imsic-state.h @@ -91,6 +91,7 @@ static inline struct imsic_vector *imsic_vector_get_move(struct imsic_vector *ve return READ_ONCE(vec->move_prev); } +void imsic_vector_force_move_cleanup(struct imsic_vector *vec); void imsic_vector_move(struct imsic_vector *old_vec, struct imsic_vector *new_vec); struct imsic_vector *imsic_vector_from_local_id(unsigned int cpu, unsigned int local_id); -- 2.51.0 From 0bd55080ba9e3c16719f75006fd85b932c85f2f4 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 17 Feb 2025 14:26:55 +0530 Subject: [PATCH 12/16] irqchip/riscv-imsic: Avoid interrupt translation in interrupt handler Currently, imsic_handle_irq() uses generic_handle_domain_irq() to handle the interrupt, which internally has an extra step of resolving hwirq using domain. Avoid the translation step by replacing the hardware interrupt number with the Linux interrupt number in the IMSIC vector data and directly call generic_handle_irq(). Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250217085657.789309-10-apatel@ventanamicro.com --- drivers/irqchip/irq-riscv-imsic-early.c | 6 ++---- drivers/irqchip/irq-riscv-imsic-platform.c | 2 +- drivers/irqchip/irq-riscv-imsic-state.c | 8 ++++---- drivers/irqchip/irq-riscv-imsic-state.h | 4 ++-- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/irqchip/irq-riscv-imsic-early.c b/drivers/irqchip/irq-riscv-imsic-early.c index b5def6268936..d2e8ed70d396 100644 --- a/drivers/irqchip/irq-riscv-imsic-early.c +++ b/drivers/irqchip/irq-riscv-imsic-early.c @@ -73,7 +73,7 @@ static int __init imsic_ipi_domain_init(void) { return 0; } static void imsic_handle_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); - int err, cpu = smp_processor_id(); + int cpu = smp_processor_id(); struct imsic_vector *vec; unsigned long local_id; @@ -103,9 +103,7 @@ static void imsic_handle_irq(struct irq_desc *desc) continue; } - err = generic_handle_domain_irq(imsic->base_domain, vec->hwirq); - if (unlikely(err)) - pr_warn_ratelimited("hwirq 0x%x mapping not found\n", vec->hwirq); + generic_handle_irq(vec->irq); } chained_irq_exit(chip, desc); diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c index b9e3f9030bdf..6bf5d63f614e 100644 --- a/drivers/irqchip/irq-riscv-imsic-platform.c +++ b/drivers/irqchip/irq-riscv-imsic-platform.c @@ -111,7 +111,7 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask return -EBUSY; /* Get a new vector on the desired set of CPUs */ - new_vec = imsic_vector_alloc(old_vec->hwirq, mask_val); + new_vec = imsic_vector_alloc(old_vec->irq, mask_val); if (!new_vec) return -ENOSPC; diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c index eb0a9b6a5239..b0849af700c3 100644 --- a/drivers/irqchip/irq-riscv-imsic-state.c +++ b/drivers/irqchip/irq-riscv-imsic-state.c @@ -422,7 +422,7 @@ struct imsic_vector *imsic_vector_from_local_id(unsigned int cpu, unsigned int l return &lpriv->vectors[local_id]; } -struct imsic_vector *imsic_vector_alloc(unsigned int hwirq, const struct cpumask *mask) +struct imsic_vector *imsic_vector_alloc(unsigned int irq, const struct cpumask *mask) { struct imsic_vector *vec = NULL; struct imsic_local_priv *lpriv; @@ -438,7 +438,7 @@ struct imsic_vector *imsic_vector_alloc(unsigned int hwirq, const struct cpumask lpriv = per_cpu_ptr(imsic->lpriv, cpu); vec = &lpriv->vectors[local_id]; - vec->hwirq = hwirq; + vec->irq = irq; vec->enable = false; vec->move_next = NULL; vec->move_prev = NULL; @@ -451,7 +451,7 @@ void imsic_vector_free(struct imsic_vector *vec) unsigned long flags; raw_spin_lock_irqsave(&imsic->matrix_lock, flags); - vec->hwirq = UINT_MAX; + vec->irq = 0; irq_matrix_free(imsic->matrix, vec->cpu, vec->local_id, false); raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags); } @@ -510,7 +510,7 @@ static int __init imsic_local_init(void) vec = &lpriv->vectors[i]; vec->cpu = cpu; vec->local_id = i; - vec->hwirq = UINT_MAX; + vec->irq = 0; } } diff --git a/drivers/irqchip/irq-riscv-imsic-state.h b/drivers/irqchip/irq-riscv-imsic-state.h index 19dea0c77738..3202ffa4e849 100644 --- a/drivers/irqchip/irq-riscv-imsic-state.h +++ b/drivers/irqchip/irq-riscv-imsic-state.h @@ -20,7 +20,7 @@ struct imsic_vector { unsigned int cpu; unsigned int local_id; /* Details saved by driver in the vector */ - unsigned int hwirq; + unsigned int irq; /* Details accessed using local lock held */ bool enable; struct imsic_vector *move_next; @@ -96,7 +96,7 @@ void imsic_vector_move(struct imsic_vector *old_vec, struct imsic_vector *new_ve struct imsic_vector *imsic_vector_from_local_id(unsigned int cpu, unsigned int local_id); -struct imsic_vector *imsic_vector_alloc(unsigned int hwirq, const struct cpumask *mask); +struct imsic_vector *imsic_vector_alloc(unsigned int irq, const struct cpumask *mask); void imsic_vector_free(struct imsic_vector *vector); void imsic_vector_debug_show(struct seq_file *m, struct imsic_vector *vec, int ind); -- 2.51.0 From 896f8e436f9951fa9ef68dab0a3d399ec3a6e1d7 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 17 Feb 2025 14:26:56 +0530 Subject: [PATCH 13/16] irqchip/riscv-imsic: Special handling for non-atomic device MSI update Devices, which have a non-atomic MSI update, might see an intermediate state when changing the target IMSIC vector from one CPU to another. To avoid losing interrupts due to this intermediate state, do the following just like x86 APIC: 1) First write a temporary IMSIC vector to the device which has the same MSI address as the old IMSIC vector and MSI data pointing to the new IMSIC vector. 2) Next write the new IMSIC vector to the device. Based on the above, the __imsic_local_sync() must check pending status of both old MSI data and new MSI data on the old CPU. In addition, the movement of IMSIC vector for non-atomic device MSI update must be done in interrupt context using IRQCHIP_MOVE_DEFERRED. Implememnt the logic and enforce the chip flag for PCI/MSI[X]. Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250217085657.789309-11-apatel@ventanamicro.com --- drivers/irqchip/irq-riscv-imsic-platform.c | 87 +++++++++++++++++++--- drivers/irqchip/irq-riscv-imsic-state.c | 30 +++++++- 2 files changed, 102 insertions(+), 15 deletions(-) diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c index 6bf5d63f614e..b8ae67c25b37 100644 --- a/drivers/irqchip/irq-riscv-imsic-platform.c +++ b/drivers/irqchip/irq-riscv-imsic-platform.c @@ -64,6 +64,11 @@ static int imsic_irq_retrigger(struct irq_data *d) return 0; } +static void imsic_irq_ack(struct irq_data *d) +{ + irq_move_irq(d); +} + static void imsic_irq_compose_vector_msg(struct imsic_vector *vec, struct msi_msg *msg) { phys_addr_t msi_addr; @@ -97,6 +102,21 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask bool force) { struct imsic_vector *old_vec, *new_vec; + struct imsic_vector tmp_vec; + + /* + * Requirements for the downstream irqdomains (or devices): + * + * 1) Downstream irqdomains (or devices) with atomic MSI update can + * happily do imsic_irq_set_affinity() in the process-context on + * any CPU so the irqchip of such irqdomains must not set the + * IRQCHIP_MOVE_DEFERRED flag. + * + * 2) Downstream irqdomains (or devices) with non-atomic MSI update + * must use imsic_irq_set_affinity() in nterrupt-context upon + * the next device interrupt so the irqchip of such irqdomains + * must set the IRQCHIP_MOVE_DEFERRED flag. + */ old_vec = irq_data_get_irq_chip_data(d); if (WARN_ON(!old_vec)) @@ -115,6 +135,32 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask if (!new_vec) return -ENOSPC; + /* + * Device having non-atomic MSI update might see an intermediate + * state when changing target IMSIC vector from one CPU to another. + * + * To avoid losing interrupt to such intermediate state, do the + * following (just like x86 APIC): + * + * 1) First write a temporary IMSIC vector to the device which + * has MSI address same as the old IMSIC vector but MSI data + * matches the new IMSIC vector. + * + * 2) Next write the new IMSIC vector to the device. + * + * Based on the above, __imsic_local_sync() must check pending + * status of both old MSI data and new MSI data on the old CPU. + */ + if (!irq_can_move_in_process_context(d) && + new_vec->local_id != old_vec->local_id) { + /* Setup temporary vector */ + tmp_vec.cpu = old_vec->cpu; + tmp_vec.local_id = new_vec->local_id; + + /* Point device to the temporary vector */ + imsic_msi_update_msg(irq_get_irq_data(d->irq), &tmp_vec); + } + /* Point device to the new vector */ imsic_msi_update_msg(irq_get_irq_data(d->irq), new_vec); @@ -163,17 +209,17 @@ static void imsic_irq_force_complete_move(struct irq_data *d) #endif static struct irq_chip imsic_irq_base_chip = { - .name = "IMSIC", - .irq_mask = imsic_irq_mask, - .irq_unmask = imsic_irq_unmask, + .name = "IMSIC", + .irq_mask = imsic_irq_mask, + .irq_unmask = imsic_irq_unmask, #ifdef CONFIG_SMP - .irq_set_affinity = imsic_irq_set_affinity, - .irq_force_complete_move = imsic_irq_force_complete_move, + .irq_set_affinity = imsic_irq_set_affinity, + .irq_force_complete_move = imsic_irq_force_complete_move, #endif - .irq_retrigger = imsic_irq_retrigger, - .irq_compose_msi_msg = imsic_irq_compose_msg, - .flags = IRQCHIP_SKIP_SET_WAKE | - IRQCHIP_MASK_ON_SUSPEND, + .irq_retrigger = imsic_irq_retrigger, + .irq_ack = imsic_irq_ack, + .irq_compose_msi_msg = imsic_irq_compose_msg, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND, }; static int imsic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, @@ -190,7 +236,7 @@ static int imsic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, return -ENOSPC; irq_domain_set_info(domain, virq, virq, &imsic_irq_base_chip, vec, - handle_simple_irq, NULL, NULL); + handle_edge_irq, NULL, NULL); irq_set_noprobe(virq); irq_set_affinity(virq, cpu_online_mask); irq_data_update_effective_affinity(irq_get_irq_data(virq), cpumask_of(vec->cpu)); @@ -229,15 +275,34 @@ static const struct irq_domain_ops imsic_base_domain_ops = { #endif }; +static bool imsic_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, struct msi_domain_info *info) +{ + if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) + return false; + + switch (info->bus_token) { + case DOMAIN_BUS_PCI_DEVICE_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSIX: + info->chip->flags |= IRQCHIP_MOVE_DEFERRED; + break; + default: + break; + } + + return true; +} + static const struct msi_parent_ops imsic_msi_parent_ops = { .supported_flags = MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX, .required_flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | MSI_FLAG_PCI_MSI_MASK_PARENT, + .chip_flags = MSI_CHIP_FLAG_SET_ACK, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, - .init_dev_msi_info = msi_lib_init_dev_msi_info, + .init_dev_msi_info = imsic_init_dev_msi_info, }; int imsic_irqdomain_init(void) diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c index b0849af700c3..bdf5cd2037f2 100644 --- a/drivers/irqchip/irq-riscv-imsic-state.c +++ b/drivers/irqchip/irq-riscv-imsic-state.c @@ -126,8 +126,8 @@ void __imsic_eix_update(unsigned long base_id, unsigned long num_id, bool pend, static bool __imsic_local_sync(struct imsic_local_priv *lpriv) { - struct imsic_local_config *mlocal; - struct imsic_vector *vec, *mvec; + struct imsic_local_config *tlocal, *mlocal; + struct imsic_vector *vec, *tvec, *mvec; bool ret = true; int i; @@ -169,13 +169,35 @@ static bool __imsic_local_sync(struct imsic_local_priv *lpriv) */ mvec = READ_ONCE(vec->move_next); if (mvec) { - if (__imsic_id_read_clear_pending(i)) { + /* + * Devices having non-atomic MSI update might see + * an intermediate state so check both old ID and + * new ID for pending interrupts. + * + * For details, see imsic_irq_set_affinity(). + */ + tvec = vec->local_id == mvec->local_id ? + NULL : &lpriv->vectors[mvec->local_id]; + + if (tvec && !irq_can_move_in_process_context(irq_get_irq_data(vec->irq)) && + __imsic_id_read_clear_pending(tvec->local_id)) { + /* Retrigger temporary vector if it was already in-use */ + if (READ_ONCE(tvec->enable)) { + tlocal = per_cpu_ptr(imsic->global.local, tvec->cpu); + writel_relaxed(tvec->local_id, tlocal->msi_va); + } + + mlocal = per_cpu_ptr(imsic->global.local, mvec->cpu); + writel_relaxed(mvec->local_id, mlocal->msi_va); + } + + if (__imsic_id_read_clear_pending(vec->local_id)) { mlocal = per_cpu_ptr(imsic->global.local, mvec->cpu); writel_relaxed(mvec->local_id, mlocal->msi_va); } WRITE_ONCE(vec->move_next, NULL); - imsic_vector_free(&lpriv->vectors[i]); + imsic_vector_free(vec); } skip: -- 2.51.0 From 2d81e1bb625238d40a686ed909ff3e1abab7556a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 17 Feb 2025 01:16:32 +0300 Subject: [PATCH 14/16] irqchip/gic-v3: Add Rockchip 3568002 erratum workaround Rockchip RK3566/RK3568 GIC600 integration has DDR addressing limited to the first 32bit of physical address space. Rockchip assigned Erratum ID #3568002 for this issue. Add driver quirk for this Rockchip GIC Erratum. Note, that the 0x0201743b GIC600 ID is not Rockchip-specific and is common for many ARM GICv3 implementations. Hence, there is an extra of_machine_is_compatible() check. Signed-off-by: Dmitry Osipenko Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Link: https://lore.kernel.org/all/20250216221634.364158-2-dmitry.osipenko@collabora.com --- Documentation/arch/arm64/silicon-errata.rst | 2 ++ arch/arm64/Kconfig | 9 ++++++++ drivers/irqchip/irq-gic-v3-its.c | 23 ++++++++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index f074f6219f5c..f968c13b46a7 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -284,6 +284,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Rockchip | RK3588 | #3588001 | ROCKCHIP_ERRATUM_3588001 | +----------------+-----------------+-----------------+-----------------------------+ +| Rockchip | RK3568 | #3568002 | ROCKCHIP_ERRATUM_3568002 | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fcdd0ed3eca8..0507e476f69f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1303,6 +1303,15 @@ config NVIDIA_CARMEL_CNP_ERRATUM If unsure, say Y. +config ROCKCHIP_ERRATUM_3568002 + bool "Rockchip 3568002: GIC600 can not access physical addresses higher than 4GB" + default y + help + The Rockchip RK3566 and RK3568 GIC600 SoC integrations have AXI + addressing limited to the first 32bit of physical address space. + + If unsure, say Y. + config ROCKCHIP_ERRATUM_3588001 bool "Rockchip 3588001: GIC600 can not support shareability attributes" default y diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 8c3ec5734f1e..f30ed281882f 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -205,13 +205,15 @@ static DEFINE_IDA(its_vpeid_ida); #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) #define gic_data_rdist_vlpi_base() (gic_data_rdist_rd_base() + SZ_128K) +static gfp_t gfp_flags_quirk; + static struct page *its_alloc_pages_node(int node, gfp_t gfp, unsigned int order) { struct page *page; int ret = 0; - page = alloc_pages_node(node, gfp, order); + page = alloc_pages_node(node, gfp | gfp_flags_quirk, order); if (!page) return NULL; @@ -4887,6 +4889,17 @@ static bool __maybe_unused its_enable_quirk_hip09_162100801(void *data) return true; } +static bool __maybe_unused its_enable_rk3568002(void *data) +{ + if (!of_machine_is_compatible("rockchip,rk3566") && + !of_machine_is_compatible("rockchip,rk3568")) + return false; + + gfp_flags_quirk |= GFP_DMA32; + + return true; +} + static const struct gic_quirk its_quirks[] = { #ifdef CONFIG_CAVIUM_ERRATUM_22375 { @@ -4954,6 +4967,14 @@ static const struct gic_quirk its_quirks[] = { .property = "dma-noncoherent", .init = its_set_non_coherent, }, +#ifdef CONFIG_ROCKCHIP_ERRATUM_3568002 + { + .desc = "ITS: Rockchip erratum RK3568002", + .iidr = 0x0201743b, + .mask = 0xffffffff, + .init = its_enable_rk3568002, + }, +#endif { } }; -- 2.51.0 From f15be3d4a0a55db2b50f319c378a2d16ceb21f86 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 17 Feb 2025 01:16:33 +0300 Subject: [PATCH 15/16] arm64: dts: rockchip: rk356x: Add MSI controller node Rockchip 356x SoC's GIC has two hardware integration issues that affect MSI functionality of the GIC. Previously, both these GIC issues were worked around by using MBI for MSI instead of ITS because kernel GIC driver didn't have necessary quirks. First issue is about RK356x GIC not supporting programmable shareability, while reporting it as supported in a GIC's feature register. Rockchip assigned Erratum ID #3568001 for this issue. This patch adds dma-noncoherent property to the GIC node, denoting that a SW workaround is required for mitigating the issue. Second issue is about GIC AXI master interface addressing limited to the first 4GB of physical address space. Rockchip assigned Erratum ID #3568002 for this issue. Now that kernel supports quirks for both of the erratums, add MSI controller node to RK356x device-tree. Signed-off-by: Dmitry Osipenko Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250216221634.364158-3-dmitry.osipenko@collabora.com --- arch/arm64/boot/dts/rockchip/rk356x-base.dtsi | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi b/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi index e55390629114..89ba0d7f47f7 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi @@ -284,6 +284,18 @@ mbi-alias = <0x0 0xfd410000>; mbi-ranges = <296 24>; msi-controller; + ranges; + #address-cells = <2>; + #size-cells = <2>; + dma-noncoherent; + + its: msi-controller@fd440000 { + compatible = "arm,gic-v3-its"; + reg = <0x0 0xfd440000 0 0x20000>; + dma-noncoherent; + msi-controller; + #msi-cells = <1>; + }; }; usb_host0_ehci: usb@fd800000 { -- 2.51.0 From b956c9de91757c9478e24fc9f6a57fd46f0a49f0 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 17 Feb 2025 01:16:34 +0300 Subject: [PATCH 16/16] arm64: dts: rockchip: rk356x: Move PCIe MSI to use GIC ITS instead of MBI Rockchip 356x device-tree now supports GIC ITS. Move PCIe controller's MSI to use ITS instead of MBI. This removes extra CPU overhead of handling PCIe MBIs by letting GIC's ITS to serve the PCIe MSIs. Signed-off-by: Dmitry Osipenko Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250216221634.364158-4-dmitry.osipenko@collabora.com --- arch/arm64/boot/dts/rockchip/rk356x-base.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi b/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi index 89ba0d7f47f7..4e730aecf84d 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi @@ -969,7 +969,7 @@ num-ib-windows = <6>; num-ob-windows = <2>; max-link-speed = <2>; - msi-map = <0x0 &gic 0x0 0x1000>; + msi-map = <0x0 &its 0x0 0x1000>; num-lanes = <1>; phys = <&combphy2 PHY_TYPE_PCIE>; phy-names = "pcie-phy"; -- 2.51.0