From 4670f7bc17dfce649a1877c72643c133ef9055ab Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 6 Dec 2024 14:52:35 +0100 Subject: [PATCH 01/16] s390/mm/hugetlbfs: Remove huge_pte_none() / huge_pte_none_mostly() Slightly cleanup arch/s390/include/asm/hugetlb.h: - Remove huge_pte_none() / huge_pte_none_mostly() which are identical to the generic variants - Coding style adjustments Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/hugetlb.h | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index a40664b236e9..7c52acaf9f82 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -20,12 +20,13 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz); void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); + pte_t *ptep, pte_t pte); + #define __HAVE_ARCH_HUGE_PTEP_GET -extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR -extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); static inline void arch_clear_hugetlb_flags(struct folio *folio) { @@ -56,6 +57,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, pte_t pte, int dirty) { int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte); + if (changed) { huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); __set_huge_pte_at(vma->vm_mm, addr, ptep, pte); @@ -68,19 +70,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep); - __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); -} -#define __HAVE_ARCH_HUGE_PTE_NONE -static inline int huge_pte_none(pte_t pte) -{ - return pte_none(pte); -} - -#define __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY -static inline int huge_pte_none_mostly(pte_t pte) -{ - return huge_pte_none(pte) || is_pte_marker(pte); + __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); } #define __HAVE_ARCH_HUGE_PTE_MKUFFD_WP -- 2.51.0 From 4ec6054e7321dc24ebccaa08b3af0d590f5666e6 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 13 Dec 2024 14:47:28 +0100 Subject: [PATCH 02/16] s390/pci: Report PCI error recovery results via SCLP Add a mechanism with which the status of PCI error recovery runs is reported to the platform. Together with the status supply additional information that may aid in problem determination. Reviewed-by: Halil Pasic Signed-off-by: Niklas Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/sclp.h | 33 +++++++++++ arch/s390/pci/Makefile | 2 +- arch/s390/pci/pci_event.c | 21 +++++-- arch/s390/pci/pci_report.c | 111 +++++++++++++++++++++++++++++++++++ arch/s390/pci/pci_report.h | 16 +++++ drivers/s390/char/sclp.h | 14 ----- drivers/s390/char/sclp_pci.c | 19 ------ 7 files changed, 178 insertions(+), 38 deletions(-) create mode 100644 arch/s390/pci/pci_report.c create mode 100644 arch/s390/pci/pci_report.h diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index eb00fa1771da..3267631b5adc 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -16,6 +16,11 @@ /* 24 + 16 * SCLP_MAX_CORES */ #define EXT_SCCB_READ_CPU (3 * PAGE_SIZE) +#define SCLP_ERRNOTIFY_AQ_RESET 0 +#define SCLP_ERRNOTIFY_AQ_REPAIR 1 +#define SCLP_ERRNOTIFY_AQ_INFO_LOG 2 +#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA 3 + #ifndef __ASSEMBLY__ #include #include @@ -111,6 +116,34 @@ struct sclp_info { }; extern struct sclp_info sclp; +struct sccb_header { + u16 length; + u8 function_code; + u8 control_mask[3]; + u16 response_code; +} __packed; + +struct evbuf_header { + u16 length; + u8 type; + u8 flags; + u16 _reserved; +} __packed; + +struct err_notify_evbuf { + struct evbuf_header header; + u8 action; + u8 atype; + u32 fh; + u32 fid; + u8 data[]; +} __packed; + +struct err_notify_sccb { + struct sccb_header header; + struct err_notify_evbuf evbuf; +} __packed; + struct zpci_report_error_header { u8 version; /* Interface version byte */ u8 action; /* Action qualifier byte diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index 2c21f0394c9a..df73c5182990 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -5,6 +5,6 @@ obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o \ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ - pci_bus.o pci_kvm_hook.o + pci_bus.o pci_kvm_hook.o pci_report.o obj-$(CONFIG_PCI_IOV) += pci_iov.o obj-$(CONFIG_SYSFS) += pci_sysfs.o diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 7f7b732b3f3e..7bd7721c1239 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -16,6 +16,7 @@ #include #include "pci_bus.h" +#include "pci_report.h" /* Content Code Description for PCI Function Error */ struct zpci_ccdf_err { @@ -169,6 +170,8 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) { pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; + struct zpci_dev *zdev = to_zpci(pdev); + char *status_str = "success"; struct pci_driver *driver; /* @@ -186,29 +189,37 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) if (is_passed_through(pdev)) { pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n", pci_name(pdev)); + status_str = "failed (pass-through)"; goto out_unlock; } driver = to_pci_driver(pdev->dev.driver); if (!is_driver_supported(driver)) { - if (!driver) + if (!driver) { pr_info("%s: Cannot be recovered because no driver is bound to the device\n", pci_name(pdev)); - else + status_str = "failed (no driver)"; + } else { pr_info("%s: The %s driver bound to the device does not support error recovery\n", pci_name(pdev), driver->name); + status_str = "failed (no driver support)"; + } goto out_unlock; } ers_res = zpci_event_notify_error_detected(pdev, driver); - if (ers_result_indicates_abort(ers_res)) + if (ers_result_indicates_abort(ers_res)) { + status_str = "failed (abort on detection)"; goto out_unlock; + } if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) { ers_res = zpci_event_do_error_state_clear(pdev, driver); - if (ers_result_indicates_abort(ers_res)) + if (ers_result_indicates_abort(ers_res)) { + status_str = "failed (abort on MMIO enable)"; goto out_unlock; + } } if (ers_res == PCI_ERS_RESULT_NEED_RESET) @@ -217,6 +228,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) if (ers_res != PCI_ERS_RESULT_RECOVERED) { pr_err("%s: Automatic recovery failed; operator intervention is required\n", pci_name(pdev)); + status_str = "failed (driver can't recover)"; goto out_unlock; } @@ -225,6 +237,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) driver->err_handler->resume(pdev); out_unlock: pci_dev_unlock(pdev); + zpci_report_status(zdev, "recovery", status_str); return ers_res; } diff --git a/arch/s390/pci/pci_report.c b/arch/s390/pci/pci_report.c new file mode 100644 index 000000000000..2754c9c161f5 --- /dev/null +++ b/arch/s390/pci/pci_report.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2024 + * + * Author(s): + * Niklas Schnelle + * + */ + +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include + +#include + +#include "pci_report.h" + +#define ZPCI_ERR_LOG_ID_KERNEL_REPORT 0x4714 + +struct zpci_report_error_data { + u64 timestamp; + u64 err_log_id; + char log_data[]; +} __packed; + +#define ZPCI_REPORT_SIZE (PAGE_SIZE - sizeof(struct err_notify_sccb)) +#define ZPCI_REPORT_DATA_SIZE (ZPCI_REPORT_SIZE - sizeof(struct zpci_report_error_data)) + +struct zpci_report_error { + struct zpci_report_error_header header; + struct zpci_report_error_data data; +} __packed; + +static const char *zpci_state_str(pci_channel_state_t state) +{ + switch (state) { + case pci_channel_io_normal: + return "normal"; + case pci_channel_io_frozen: + return "frozen"; + case pci_channel_io_perm_failure: + return "permanent-failure"; + default: + return "invalid"; + }; +} + +/** + * zpci_report_status - Report the status of operations on a PCI device + * @zdev: The PCI device for which to report status + * @operation: A string representing the operation reported + * @status: A string representing the status of the operation + * + * This function creates a human readable report about an operation such as + * PCI device recovery and forwards this to the platform using the SCLP Write + * Event Data mechanism. Besides the operation and status strings the report + * also contains additional information about the device deemed useful for + * debug such as the currently bound device driver, if any, and error state. + * + * Return: 0 on success an error code < 0 otherwise. + */ +int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status) +{ + struct zpci_report_error *report; + struct pci_driver *driver = NULL; + struct pci_dev *pdev = NULL; + char *buf, *end; + int ret; + + if (!zdev || !zdev->zbus) + return -ENODEV; + + /* Protected virtualization hosts get nothing from us */ + if (prot_virt_guest) + return -ENODATA; + + report = (void *)get_zeroed_page(GFP_KERNEL); + if (!report) + return -ENOMEM; + if (zdev->zbus->bus) + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); + if (pdev) + driver = to_pci_driver(pdev->dev.driver); + + buf = report->data.log_data; + end = report->data.log_data + ZPCI_REPORT_DATA_SIZE; + buf += scnprintf(buf, end - buf, "report: %s\n", operation); + buf += scnprintf(buf, end - buf, "status: %s\n", status); + buf += scnprintf(buf, end - buf, "state: %s\n", + (pdev) ? zpci_state_str(pdev->error_state) : "n/a"); + buf += scnprintf(buf, end - buf, "driver: %s\n", (driver) ? driver->name : "n/a"); + + report->header.version = 1; + report->header.action = SCLP_ERRNOTIFY_AQ_INFO_LOG; + report->header.length = buf - (char *)&report->data; + report->data.timestamp = ktime_get_clocktai_seconds(); + report->data.err_log_id = ZPCI_ERR_LOG_ID_KERNEL_REPORT; + + ret = sclp_pci_report(&report->header, zdev->fh, zdev->fid); + if (ret) + pr_err("Reporting PCI status failed with code %d\n", ret); + else + pr_info("Reported PCI device status\n"); + + free_page((unsigned long)report); + + return ret; +} diff --git a/arch/s390/pci/pci_report.h b/arch/s390/pci/pci_report.h new file mode 100644 index 000000000000..e08003d51a97 --- /dev/null +++ b/arch/s390/pci/pci_report.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2024 + * + * Author(s): + * Niklas Schnelle + * + */ +#ifndef __S390_PCI_REPORT_H +#define __S390_PCI_REPORT_H + +struct zpci_dev; + +int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status); + +#endif /* __S390_PCI_REPORT_H */ diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index 6c91e422927f..73731fa2594e 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -85,13 +85,6 @@ typedef unsigned int sclp_cmdw_t; typedef u64 sccb_mask_t; -struct sccb_header { - u16 length; - u8 function_code; - u8 control_mask[3]; - u16 response_code; -} __attribute__((packed)); - struct init_sccb { struct sccb_header header; u16 _reserved; @@ -238,13 +231,6 @@ struct gds_vector { u16 gds_id; } __attribute__((packed)); -struct evbuf_header { - u16 length; - u8 type; - u8 flags; - u16 _reserved; -} __attribute__((packed)); - struct sclp_req { struct list_head list; /* list_head for request queueing. */ sclp_cmdw_t command; /* sclp command to execute */ diff --git a/drivers/s390/char/sclp_pci.c b/drivers/s390/char/sclp_pci.c index c3466a8c56bb..56400886f7fc 100644 --- a/drivers/s390/char/sclp_pci.c +++ b/drivers/s390/char/sclp_pci.c @@ -24,30 +24,11 @@ #define SCLP_ATYPE_PCI 2 -#define SCLP_ERRNOTIFY_AQ_RESET 0 -#define SCLP_ERRNOTIFY_AQ_REPAIR 1 -#define SCLP_ERRNOTIFY_AQ_INFO_LOG 2 -#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA 3 - static DEFINE_MUTEX(sclp_pci_mutex); static struct sclp_register sclp_pci_event = { .send_mask = EVTYP_ERRNOTIFY_MASK, }; -struct err_notify_evbuf { - struct evbuf_header header; - u8 action; - u8 atype; - u32 fh; - u32 fid; - u8 data[]; -} __packed; - -struct err_notify_sccb { - struct sccb_header header; - struct err_notify_evbuf evbuf; -} __packed; - struct pci_cfg_sccb { struct sccb_header header; u8 atype; /* adapter type */ -- 2.51.0 From 7832b3047d10e2c1f9cfed49de818a38aea251f6 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 13 Dec 2024 14:47:29 +0100 Subject: [PATCH 03/16] s390/debug: Simplify and document debug_next_entry() logic Contrary to convention debug_next_entry() returns a falsy 0 value if there are more entries and a truthy 1 value when there are no more entries. As there is only one caller just reverse this logic to be less surprising and document the behavior in a kdoc comment. Also replace the goto with an early return. In the future this allows using it in a do {} while (debug_next_entry(...)) loop. Reviewed-by: Halil Pasic Signed-off-by: Niklas Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/kernel/debug.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index de19fd8a6a95..e3598dbccd24 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -422,11 +422,17 @@ out: return len; } -/* - * debug_next_entry: - * - goto next entry in p_info +/** + * debug_next_entry - Go to the next entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the next entry. If no further entry + * exists the current position is set to one after the end the return value + * indicates that no further entries exist. + * + * Return: True if there are more following entries, false otherwise */ -static inline int debug_next_entry(file_private_info_t *p_info) +static inline bool debug_next_entry(file_private_info_t *p_info) { debug_info_t *id; @@ -434,10 +440,10 @@ static inline int debug_next_entry(file_private_info_t *p_info) if (p_info->act_entry == DEBUG_PROLOG_ENTRY) { p_info->act_entry = 0; p_info->act_page = 0; - goto out; + return true; } if (!id->areas) - return 1; + return false; p_info->act_entry += id->entry_size; /* switch to next page, if we reached the end of the page */ if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) { @@ -450,10 +456,9 @@ static inline int debug_next_entry(file_private_info_t *p_info) p_info->act_page = 0; } if (p_info->act_area >= id->nr_areas) - return 1; + return false; } -out: - return 0; + return true; } /* @@ -495,7 +500,7 @@ static ssize_t debug_output(struct file *file, /* file descriptor */ } if (copy_size == formatted_line_residue) { entry_offset = 0; - if (debug_next_entry(p_info)) + if (!debug_next_entry(p_info)) goto out; } } -- 2.51.0 From 460c52a57f83f0cb510ba04ac8263e1ee95b2d66 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 13 Dec 2024 14:47:30 +0100 Subject: [PATCH 04/16] s390/debug: Split private data alloc/free out of file operations Split the allocation respectively freeing of file_private_info_t out of open() respectively close(). This will be used in a follow on change to access to debug views without going through the s390dbf filesystem. Reviewed-by: Halil Pasic Signed-off-by: Niklas Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/kernel/debug.c | 78 ++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index e3598dbccd24..463c9a19a3b5 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -535,6 +535,42 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, return rc; /* number of input characters */ } +static file_private_info_t *debug_file_private_alloc(debug_info_t *debug_info, + struct debug_view *view) +{ + debug_info_t *debug_info_snapshot; + file_private_info_t *p_info; + + /* + * Make snapshot of current debug areas to get it consistent. + * To copy all the areas is only needed, if we have a view which + * formats the debug areas. + */ + if (!view->format_proc && !view->header_proc) + debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); + else + debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); + + if (!debug_info_snapshot) + return NULL; + p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); + if (!p_info) { + debug_info_free(debug_info_snapshot); + return NULL; + } + p_info->offset = 0; + p_info->debug_info_snap = debug_info_snapshot; + p_info->debug_info_org = debug_info; + p_info->view = view; + p_info->act_area = 0; + p_info->act_page = 0; + p_info->act_entry = DEBUG_PROLOG_ENTRY; + p_info->act_entry_offset = 0; + debug_info_get(debug_info); + + return p_info; +} + /* * debug_open: * - called for user open() @@ -543,7 +579,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, */ static int debug_open(struct inode *inode, struct file *file) { - debug_info_t *debug_info, *debug_info_snapshot; + debug_info_t *debug_info; file_private_info_t *p_info; int i, rc = 0; @@ -561,42 +597,26 @@ static int debug_open(struct inode *inode, struct file *file) goto out; found: - - /* Make snapshot of current debug areas to get it consistent. */ - /* To copy all the areas is only needed, if we have a view which */ - /* formats the debug areas. */ - - if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc) - debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); - else - debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); - - if (!debug_info_snapshot) { - rc = -ENOMEM; - goto out; - } - p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); + p_info = debug_file_private_alloc(debug_info, debug_info->views[i]); if (!p_info) { - debug_info_free(debug_info_snapshot); rc = -ENOMEM; goto out; } - p_info->offset = 0; - p_info->debug_info_snap = debug_info_snapshot; - p_info->debug_info_org = debug_info; - p_info->view = debug_info->views[i]; - p_info->act_area = 0; - p_info->act_page = 0; - p_info->act_entry = DEBUG_PROLOG_ENTRY; - p_info->act_entry_offset = 0; file->private_data = p_info; - debug_info_get(debug_info); nonseekable_open(inode, file); out: mutex_unlock(&debug_mutex); return rc; } +static void debug_file_private_free(file_private_info_t *p_info) +{ + if (p_info->debug_info_snap) + debug_info_free(p_info->debug_info_snap); + debug_info_put(p_info->debug_info_org); + kfree(p_info); +} + /* * debug_close: * - called for user close() @@ -607,10 +627,8 @@ static int debug_close(struct inode *inode, struct file *file) file_private_info_t *p_info; p_info = (file_private_info_t *) file->private_data; - if (p_info->debug_info_snap) - debug_info_free(p_info->debug_info_snap); - debug_info_put(p_info->debug_info_org); - kfree(file->private_data); + debug_file_private_free(p_info); + file->private_data = NULL; return 0; /* success */ } -- 2.51.0 From 5f952dae48d034b0736593ba98b5aef84038522b Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 13 Dec 2024 14:47:31 +0100 Subject: [PATCH 05/16] s390/debug: Add debug_dump() to write debug view to a string buffer The debug_dump() function allows to get the content of a debug log and view pair in a string buffer. One future application of this is to provide debug logs to the platform to be collected with hardware error logs during recovery. Reviewed-by: Halil Pasic Co-developed-by: Halil Pasic Signed-off-by: Halil Pasic Signed-off-by: Niklas Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/debug.h | 3 +++ arch/s390/kernel/debug.c | 47 +++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index a7f7bdc9e19c..ec30f6f421a7 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -114,6 +114,9 @@ debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas, int buf_size, umode_t mode, uid_t uid, gid_t gid); +ssize_t debug_dump(debug_info_t *id, struct debug_view *view, + char *buf, size_t buf_size); + void debug_unregister(debug_info_t *id); void debug_set_level(debug_info_t *id, int new_level); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 463c9a19a3b5..2040b96d4cb3 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -632,6 +632,53 @@ static int debug_close(struct inode *inode, struct file *file) return 0; /* success */ } +/** + * debug_dump - Get a textual representation of debug info, or as much as fits + * @id: Debug information to use + * @view: View with which to dump the debug information + * @buf: Buffer the textual debug data representation is written to + * @buf_size: Size of the buffer, including the trailing '\0' byte + * + * This function may be used whenever a textual representation of the debug + * information is required without using an s390dbf file. + * + * Note: It is the callers responsibility to supply a view that is compatible + * with the debug information data. + * + * Return: On success returns the number of bytes written to the buffer not + * including the trailing '\0' byte. If bug_size == 0 the function returns 0. + * On failure an error code less than 0 is returned. + */ +ssize_t debug_dump(debug_info_t *id, struct debug_view *view, + char *buf, size_t buf_size) +{ + file_private_info_t *p_info; + size_t size, offset = 0; + + /* Need space for '\0' byte */ + if (buf_size < 1) + return 0; + buf_size--; + + p_info = debug_file_private_alloc(id, view); + if (!p_info) + return -ENOMEM; + + /* There is always at least the DEBUG_PROLOG_ENTRY */ + do { + size = debug_format_entry(p_info); + size = min(size, buf_size - offset); + memcpy(buf + offset, p_info->temp_buf, size); + offset += size; + if (offset >= buf_size) + break; + } while (debug_next_entry(p_info)); + debug_file_private_free(p_info); + buf[offset] = '\0'; + + return offset; +} + /* Create debugfs entries and add to internal list. */ static void _debug_register(debug_info_t *id) { -- 2.51.0 From dc18c81a57e75c2abfd826164600b5b4f96f5fd9 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 13 Dec 2024 14:47:32 +0100 Subject: [PATCH 06/16] s390/debug: Add a reverse mode for debug_dump() In this mode debug_dump() writes the debug log starting at the newest entry followed by earlier entries. To this end add a debug_prev_entry() helper analogous to debug_next_entry() a helper to get the latest entry which is one before the active entry and a helper to iterate either forward or backward. Reviewed-by: Halil Pasic Co-developed-by: Halil Pasic Signed-off-by: Halil Pasic Signed-off-by: Niklas Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/debug.h | 2 +- arch/s390/kernel/debug.c | 87 ++++++++++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index ec30f6f421a7..aa0995780c10 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -115,7 +115,7 @@ debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas, gid_t gid); ssize_t debug_dump(debug_info_t *id, struct debug_view *view, - char *buf, size_t buf_size); + char *buf, size_t buf_size, bool reverse); void debug_unregister(debug_info_t *id); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 2040b96d4cb3..61c393e806b2 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -354,7 +355,10 @@ static debug_info_t *debug_info_copy(debug_info_t *in, int mode) for (i = 0; i < in->nr_areas; i++) { for (j = 0; j < in->pages_per_area; j++) memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE); + rc->active_pages[i] = in->active_pages[i]; + rc->active_entries[i] = in->active_entries[i]; } + rc->active_area = in->active_area; out: spin_unlock_irqrestore(&in->lock, flags); return rc; @@ -461,6 +465,84 @@ static inline bool debug_next_entry(file_private_info_t *p_info) return true; } +/** + * debug_to_act_entry - Go to the currently active entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the currently active + * entry of @p_info->debug_info_snap + */ +static void debug_to_act_entry(file_private_info_t *p_info) +{ + debug_info_t *snap_id; + + snap_id = p_info->debug_info_snap; + p_info->act_area = snap_id->active_area; + p_info->act_page = snap_id->active_pages[snap_id->active_area]; + p_info->act_entry = snap_id->active_entries[snap_id->active_area]; +} + +/** + * debug_prev_entry - Go to the previous entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the previous entry. If no previous entry + * exists the current position is set left as DEBUG_PROLOG_ENTRY and the return value + * indicates that no previous entries exist. + * + * Return: True if there are more previous entries, false otherwise + */ + +static inline bool debug_prev_entry(file_private_info_t *p_info) +{ + debug_info_t *id; + + id = p_info->debug_info_snap; + if (p_info->act_entry == DEBUG_PROLOG_ENTRY) + debug_to_act_entry(p_info); + if (!id->areas) + return false; + p_info->act_entry -= id->entry_size; + /* switch to prev page, if we reached the beginning of the page */ + if (p_info->act_entry < 0) { + /* end of previous page */ + p_info->act_entry = rounddown(PAGE_SIZE, id->entry_size) - id->entry_size; + p_info->act_page--; + if (p_info->act_page < 0) { + /* previous area */ + p_info->act_area--; + p_info->act_page = id->pages_per_area - 1; + } + if (p_info->act_area < 0) + p_info->act_area = (id->nr_areas - 1) % id->nr_areas; + } + /* check full circle */ + if (id->active_area == p_info->act_area && + id->active_pages[id->active_area] == p_info->act_page && + id->active_entries[id->active_area] == p_info->act_entry) + return false; + return true; +} + +/** + * debug_move_entry - Go to next entry in either the forward or backward direction + * @p_info: Private info that is manipulated + * @reverse: If true go to the next entry in reverse i.e. previous + * + * Sets the current position in @p_info to the next (@reverse == false) or + * previous (@reverse == true) entry. + * + * Return: True if there are further entries in that direction, + * false otherwise. + */ +static bool debug_move_entry(file_private_info_t *p_info, bool reverse) +{ + if (reverse) + return debug_prev_entry(p_info); + else + return debug_next_entry(p_info); +} + /* * debug_output: * - called for user read() @@ -638,6 +720,7 @@ static int debug_close(struct inode *inode, struct file *file) * @view: View with which to dump the debug information * @buf: Buffer the textual debug data representation is written to * @buf_size: Size of the buffer, including the trailing '\0' byte + * @reverse: Go backwards from the last written entry * * This function may be used whenever a textual representation of the debug * information is required without using an s390dbf file. @@ -650,7 +733,7 @@ static int debug_close(struct inode *inode, struct file *file) * On failure an error code less than 0 is returned. */ ssize_t debug_dump(debug_info_t *id, struct debug_view *view, - char *buf, size_t buf_size) + char *buf, size_t buf_size, bool reverse) { file_private_info_t *p_info; size_t size, offset = 0; @@ -672,7 +755,7 @@ ssize_t debug_dump(debug_info_t *id, struct debug_view *view, offset += size; if (offset >= buf_size) break; - } while (debug_next_entry(p_info)); + } while (debug_move_entry(p_info, reverse)); debug_file_private_free(p_info); buf[offset] = '\0'; -- 2.51.0 From 4c41a48f5f3ecd3b963cd3820d2ea41d9a8d6516 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 13 Dec 2024 14:47:33 +0100 Subject: [PATCH 07/16] s390/pci: Add pci_msg debug view to PCI report Using the newly introduced debug_dump() mechanism add formatted content of pci_debug_msg_id to the PCI report. The formatting is based on the existing sprintf format but removes caller pointer and area index and adds an column header. This will allow the platform to collect this log data together with hardware errors. This sets the reverse flag such that the newest log entries get added to the PCI report even if not all debug log entries fit. Reviewed-by: Halil Pasic Co-developed-by: Halil Pasic Signed-off-by: Halil Pasic Signed-off-by: Niklas Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/debug.h | 4 +++ arch/s390/kernel/debug.c | 8 +++--- arch/s390/pci/pci_report.c | 47 +++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index aa0995780c10..6375276d94ea 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -85,6 +85,10 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, int area, debug_entry_t *entry, char *out_buf, size_t out_buf_size); +#define DEBUG_SPRINTF_MAX_ARGS 10 +int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size, + const char *inbuf); struct debug_view { char name[DEBUG_MAX_NAME_LEN]; debug_prolog_proc_t *prolog_proc; diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 61c393e806b2..ba6b7329a10e 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -95,9 +95,6 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, size_t out_buf_size, const char *in_buf); -static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, size_t out_buf_size, - const char *inbuf); static void debug_areas_swap(debug_info_t *a, debug_info_t *b); static void debug_events_append(debug_info_t *dest, debug_info_t *src); @@ -1685,8 +1682,8 @@ EXPORT_SYMBOL(debug_dflt_header_fn); #define DEBUG_SPRINTF_MAX_ARGS 10 -static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, size_t out_buf_size, const char *inbuf) +int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size, const char *inbuf) { debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf; int num_longs, num_used_args = 0, i, rc = 0; @@ -1723,6 +1720,7 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, out: return rc; } +EXPORT_SYMBOL(debug_sprintf_format_fn); /* * debug_init: diff --git a/arch/s390/pci/pci_report.c b/arch/s390/pci/pci_report.c index 2754c9c161f5..1b494e5ecc4d 100644 --- a/arch/s390/pci/pci_report.c +++ b/arch/s390/pci/pci_report.c @@ -15,6 +15,8 @@ #include #include +#include +#include #include "pci_report.h" @@ -48,6 +50,44 @@ static const char *zpci_state_str(pci_channel_state_t state) }; } +static int debug_log_header_fn(debug_info_t *id, struct debug_view *view, + int area, debug_entry_t *entry, char *out_buf, + size_t out_buf_size) +{ + unsigned long sec, usec; + unsigned int level; + char *except_str; + int rc = 0; + + level = entry->level; + sec = entry->clock; + usec = do_div(sec, USEC_PER_SEC); + + if (entry->exception) + except_str = "*"; + else + except_str = "-"; + rc += scnprintf(out_buf, out_buf_size, "%011ld:%06lu %1u %1s %04u ", + sec, usec, level, except_str, + entry->cpu); + return rc; +} + +static int debug_prolog_header(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size) +{ + return scnprintf(out_buf, out_buf_size, "sec:usec level except cpu msg\n"); +} + +static struct debug_view debug_log_view = { + "pci_msg_log", + &debug_prolog_header, + &debug_log_header_fn, + &debug_sprintf_format_fn, + NULL, + NULL +}; + /** * zpci_report_status - Report the status of operations on a PCI device * @zdev: The PCI device for which to report status @@ -59,6 +99,8 @@ static const char *zpci_state_str(pci_channel_state_t state) * Event Data mechanism. Besides the operation and status strings the report * also contains additional information about the device deemed useful for * debug such as the currently bound device driver, if any, and error state. + * Additionally a string representation of pci_debug_msg_id, or as much as fits, + * is also included. * * Return: 0 on success an error code < 0 otherwise. */ @@ -92,6 +134,11 @@ int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char buf += scnprintf(buf, end - buf, "state: %s\n", (pdev) ? zpci_state_str(pdev->error_state) : "n/a"); buf += scnprintf(buf, end - buf, "driver: %s\n", (driver) ? driver->name : "n/a"); + ret = debug_dump(pci_debug_msg_id, &debug_log_view, buf, end - buf, true); + if (ret < 0) + pr_err("Reading PCI debug messages failed with code %d\n", ret); + else + buf += ret; report->header.version = 1; report->header.action = SCLP_ERRNOTIFY_AQ_INFO_LOG; -- 2.51.0 From 62b87e0c9a2cb35a74b47766743135e175f488b4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 9 Dec 2024 10:45:16 +0100 Subject: [PATCH 08/16] s390/mm: Remove incorrect comment Remove an outdated comment that is also located at a random place. The generic statement that read permissions imply execute permissions is wrong since the instruction execution-protection facility is available. Reviewed-by: Gerald Schaefer Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/pgtable.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 3b21aecf69ac..235360314f67 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -461,13 +461,6 @@ static inline int is_module_addr(void *addr) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_YOUNG | _PAGE_DIRTY) -/* - * On s390 the page table entry has an invalid bit and a read-only bit. - * Read permission implies execute permission and write permission - * implies read permission. - */ - /*xwr*/ - /* * Segment entry (large page) protection definitions. */ -- 2.51.0 From db449b147cef0a210c61dc8840424456fbe45cc8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 9 Dec 2024 10:45:17 +0100 Subject: [PATCH 09/16] s390/mm: Remove unused PAGE_KERNEL_EXEC and friends Remove unused PAGE_KERNEL_EXEC, SEGMENT_KERNEL_EXEC, and REGION3_KERNEL_EXEC. Reviewed-by: Gerald Schaefer Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/pgtable.h | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 235360314f67..889974fc9810 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -458,8 +458,6 @@ static inline int is_module_addr(void *addr) _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC) #define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \ _PAGE_PROTECT | _PAGE_NOEXEC) -#define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ - _PAGE_YOUNG | _PAGE_DIRTY) /* * Segment entry (large page) protection definitions. @@ -494,12 +492,6 @@ static inline int is_module_addr(void *addr) _SEGMENT_ENTRY_YOUNG | \ _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_KERNEL_EXEC __pgprot(_SEGMENT_ENTRY | \ - _SEGMENT_ENTRY_LARGE | \ - _SEGMENT_ENTRY_READ | \ - _SEGMENT_ENTRY_WRITE | \ - _SEGMENT_ENTRY_YOUNG | \ - _SEGMENT_ENTRY_DIRTY) /* * Region3 entry (large page) protection definitions. @@ -520,13 +512,6 @@ static inline int is_module_addr(void *addr) _REGION3_ENTRY_YOUNG | \ _REGION_ENTRY_PROTECT | \ _REGION_ENTRY_NOEXEC) -#define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \ - _REGION3_ENTRY_PRESENT | \ - _REGION3_ENTRY_LARGE | \ - _REGION3_ENTRY_READ | \ - _REGION3_ENTRY_WRITE | \ - _REGION3_ENTRY_YOUNG | \ - _REGION3_ENTRY_DIRTY) static inline bool mm_p4d_folded(struct mm_struct *mm) { -- 2.51.0 From f8107a8be0b2e92798ded4aff45e3a94b763ce61 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 9 Dec 2024 10:45:18 +0100 Subject: [PATCH 10/16] s390/mm: Simplify noexec page protection handling By default page protection definitions like PAGE_RX have the _PAGE_NOEXEC bit set. For older machines without the instruction execution protection facility this bit is not allowed to be used in page table entries, and therefore must be removed. This is done at a couple of page table walkers, but also at some but not all page table modification functions like ptep_modify_prot_commit(). Avoid all of this and change the page, segment and region3 protection definitions so that the noexec bit is masked out automatically if the instruction execution-protection facility is not available. This is similar to what also various other architectures do which had to solve the same problem. Reviewed-by: Gerald Schaefer Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/boot.h | 1 - arch/s390/boot/startup.c | 13 ++++- arch/s390/boot/vmem.c | 8 --- arch/s390/include/asm/pgtable.h | 93 +++++++++++++++++++++------------ arch/s390/kernel/setup.c | 1 + arch/s390/mm/init.c | 9 ++++ arch/s390/mm/mmap.c | 42 ++++++++------- arch/s390/mm/pageattr.c | 6 --- arch/s390/mm/pgtable.c | 2 - arch/s390/mm/vmem.c | 8 --- 10 files changed, 106 insertions(+), 77 deletions(-) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 7521a9d75fa2..56244fe78182 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -13,7 +13,6 @@ struct machine_info { unsigned char has_edat1 : 1; unsigned char has_edat2 : 1; - unsigned char has_nx : 1; }; struct vmlinux_info { diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index abe6e6c0ab98..e00aed22d0d4 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -30,6 +30,9 @@ unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata_preserved(max_mappable); +unsigned long __bootdata_preserved(page_noexec_mask); +unsigned long __bootdata_preserved(segment_noexec_mask); +unsigned long __bootdata_preserved(region_noexec_mask); int __bootdata_preserved(relocate_lowcore); u64 __bootdata_preserved(stfle_fac_list[16]); @@ -51,8 +54,14 @@ static void detect_facilities(void) } if (test_facility(78)) machine.has_edat2 = 1; - if (test_facility(130)) - machine.has_nx = 1; + page_noexec_mask = -1UL; + segment_noexec_mask = -1UL; + region_noexec_mask = -1UL; + if (!test_facility(130)) { + page_noexec_mask &= ~_PAGE_NOEXEC; + segment_noexec_mask &= ~_SEGMENT_ENTRY_NOEXEC; + region_noexec_mask &= ~_REGION_ENTRY_NOEXEC; + } } static int cmma_test_essa(void) diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 00a3c59ac218..a0c97cde5146 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -67,8 +67,6 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern int i; pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); - if (!machine.has_nx) - pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); @@ -294,8 +292,6 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e continue; entry = __pte(_pa(addr, PAGE_SIZE, mode)); entry = set_pte_bit(entry, PAGE_KERNEL); - if (!machine.has_nx) - entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC)); set_pte(pte, entry); pages++; } @@ -320,8 +316,6 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e if (can_large_pmd(pmd, addr, next, mode)) { entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); entry = set_pmd_bit(entry, SEGMENT_KERNEL); - if (!machine.has_nx) - entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); set_pmd(pmd, entry); pages++; continue; @@ -353,8 +347,6 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e if (can_large_pud(pud, addr, next, mode)) { entry = __pud(_pa(addr, _REGION3_SIZE, mode)); entry = set_pud_bit(entry, REGION3_KERNEL); - if (!machine.has_nx) - entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC)); set_pud(pud, entry); pages++; continue; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 889974fc9810..a3b51056a177 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -124,6 +124,8 @@ static inline int is_module_addr(void *addr) #define KASLR_LEN 0UL #endif +void setup_protection_map(void); + /* * A 64 bit pagetable entry of S390 has following format: * | PFRA |0IPC| OS | @@ -442,76 +444,107 @@ static inline int is_module_addr(void *addr) /* * Page protection definitions. */ -#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | \ +#define __PAGE_NONE (_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT) +#define __PAGE_RO (_PAGE_PRESENT | _PAGE_READ | \ _PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RX __pgprot(_PAGE_PRESENT | _PAGE_READ | \ +#define __PAGE_RX (_PAGE_PRESENT | _PAGE_READ | \ _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RW __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_RW (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_RWX (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_INVALID | _PAGE_PROTECT) - -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_SHARED (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC) -#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_KERNEL (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC) -#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \ +#define __PAGE_KERNEL_RO (_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \ _PAGE_PROTECT | _PAGE_NOEXEC) +extern unsigned long page_noexec_mask; + +#define __pgprot_page_mask(x) __pgprot((x) & page_noexec_mask) + +#define PAGE_NONE __pgprot_page_mask(__PAGE_NONE) +#define PAGE_RO __pgprot_page_mask(__PAGE_RO) +#define PAGE_RX __pgprot_page_mask(__PAGE_RX) +#define PAGE_RW __pgprot_page_mask(__PAGE_RW) +#define PAGE_RWX __pgprot_page_mask(__PAGE_RWX) +#define PAGE_SHARED __pgprot_page_mask(__PAGE_SHARED) +#define PAGE_KERNEL __pgprot_page_mask(__PAGE_KERNEL) +#define PAGE_KERNEL_RO __pgprot_page_mask(__PAGE_KERNEL_RO) + /* * Segment entry (large page) protection definitions. */ -#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_NONE (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_INVALID | \ _SEGMENT_ENTRY_PROTECT) -#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RO (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RX (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ) -#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RW (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RWX (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE) -#define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \ +#define __SEGMENT_KERNEL (_SEGMENT_ENTRY | \ _SEGMENT_ENTRY_LARGE | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE | \ _SEGMENT_ENTRY_YOUNG | \ _SEGMENT_ENTRY_DIRTY | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY | \ +#define __SEGMENT_KERNEL_RO (_SEGMENT_ENTRY | \ _SEGMENT_ENTRY_LARGE | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_YOUNG | \ _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_NOEXEC) +extern unsigned long segment_noexec_mask; + +#define __pgprot_segment_mask(x) __pgprot((x) & segment_noexec_mask) + +#define SEGMENT_NONE __pgprot_segment_mask(__SEGMENT_NONE) +#define SEGMENT_RO __pgprot_segment_mask(__SEGMENT_RO) +#define SEGMENT_RX __pgprot_segment_mask(__SEGMENT_RX) +#define SEGMENT_RW __pgprot_segment_mask(__SEGMENT_RW) +#define SEGMENT_RWX __pgprot_segment_mask(__SEGMENT_RWX) +#define SEGMENT_KERNEL __pgprot_segment_mask(__SEGMENT_KERNEL) +#define SEGMENT_KERNEL_RO __pgprot_segment_mask(__SEGMENT_KERNEL_RO) + /* * Region3 entry (large page) protection definitions. */ -#define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \ +#define __REGION3_KERNEL (_REGION_ENTRY_TYPE_R3 | \ _REGION3_ENTRY_PRESENT | \ - _REGION3_ENTRY_LARGE | \ - _REGION3_ENTRY_READ | \ - _REGION3_ENTRY_WRITE | \ - _REGION3_ENTRY_YOUNG | \ + _REGION3_ENTRY_LARGE | \ + _REGION3_ENTRY_READ | \ + _REGION3_ENTRY_WRITE | \ + _REGION3_ENTRY_YOUNG | \ _REGION3_ENTRY_DIRTY | \ _REGION_ENTRY_NOEXEC) -#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \ - _REGION3_ENTRY_PRESENT | \ - _REGION3_ENTRY_LARGE | \ - _REGION3_ENTRY_READ | \ - _REGION3_ENTRY_YOUNG | \ - _REGION_ENTRY_PROTECT | \ - _REGION_ENTRY_NOEXEC) +#define __REGION3_KERNEL_RO (_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_PRESENT | \ + _REGION3_ENTRY_LARGE | \ + _REGION3_ENTRY_READ | \ + _REGION3_ENTRY_YOUNG | \ + _REGION_ENTRY_PROTECT | \ + _REGION_ENTRY_NOEXEC) + +extern unsigned long region_noexec_mask; + +#define __pgprot_region_mask(x) __pgprot((x) & region_noexec_mask) + +#define REGION3_KERNEL __pgprot_region_mask(__REGION3_KERNEL) +#define REGION3_KERNEL_RO __pgprot_region_mask(__REGION3_KERNEL_RO) static inline bool mm_p4d_folded(struct mm_struct *mm) { @@ -1412,8 +1445,6 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) pte_t __pte; __pte = __pte(physpage | pgprot_val(pgprot)); - if (!MACHINE_HAS_NX) - __pte = clear_pte_bit(__pte, __pgprot(_PAGE_NOEXEC)); return pte_mkyoung(__pte); } @@ -1781,8 +1812,6 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { - if (!MACHINE_HAS_NX) - entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); set_pmd(pmdp, entry); } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 95324aaa17e2..0ce550faf073 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -971,6 +971,7 @@ void __init setup_arch(char **cmdline_p) if (test_facility(193)) static_branch_enable(&cpu_has_bear); + setup_protection_map(); /* * Create kernel page tables. */ diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 7a96623a9d2e..f2298f7a3f21 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -56,6 +56,15 @@ pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir"); struct ctlreg __bootdata_preserved(s390_invalid_asce); +unsigned long __bootdata_preserved(page_noexec_mask); +EXPORT_SYMBOL(page_noexec_mask); + +unsigned long __bootdata_preserved(segment_noexec_mask); +EXPORT_SYMBOL(segment_noexec_mask); + +unsigned long __bootdata_preserved(region_noexec_mask); +EXPORT_SYMBOL(region_noexec_mask); + unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(zero_page_mask); diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 33f3504be90b..76f376876e0d 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -196,22 +196,28 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) } } -static const pgprot_t protection_map[16] = { - [VM_NONE] = PAGE_NONE, - [VM_READ] = PAGE_RO, - [VM_WRITE] = PAGE_RO, - [VM_WRITE | VM_READ] = PAGE_RO, - [VM_EXEC] = PAGE_RX, - [VM_EXEC | VM_READ] = PAGE_RX, - [VM_EXEC | VM_WRITE] = PAGE_RX, - [VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX, - [VM_SHARED] = PAGE_NONE, - [VM_SHARED | VM_READ] = PAGE_RO, - [VM_SHARED | VM_WRITE] = PAGE_RW, - [VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW, - [VM_SHARED | VM_EXEC] = PAGE_RX, - [VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX, - [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX, - [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX -}; +static pgprot_t protection_map[16] __ro_after_init; + +void __init setup_protection_map(void) +{ + pgprot_t *pm = protection_map; + + pm[VM_NONE] = PAGE_NONE; + pm[VM_READ] = PAGE_RO; + pm[VM_WRITE] = PAGE_RO; + pm[VM_WRITE | VM_READ] = PAGE_RO; + pm[VM_EXEC] = PAGE_RX; + pm[VM_EXEC | VM_READ] = PAGE_RX; + pm[VM_EXEC | VM_WRITE] = PAGE_RX; + pm[VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX; + pm[VM_SHARED] = PAGE_NONE; + pm[VM_SHARED | VM_READ] = PAGE_RO; + pm[VM_SHARED | VM_WRITE] = PAGE_RW; + pm[VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW; + pm[VM_SHARED | VM_EXEC] = PAGE_RX; + pm[VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX; + pm[VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX; + pm[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX; +} + DECLARE_VM_GET_PAGE_PROT diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 8f56a21a077f..eae97fb61712 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -109,8 +109,6 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, } else if (flags & SET_MEMORY_DEF) { new = __pte(pte_val(new) & PAGE_MASK); new = set_pte_bit(new, PAGE_KERNEL); - if (!MACHINE_HAS_NX) - new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); } pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE); ptep++; @@ -167,8 +165,6 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, } else if (flags & SET_MEMORY_DEF) { new = __pmd(pmd_val(new) & PMD_MASK); new = set_pmd_bit(new, SEGMENT_KERNEL); - if (!MACHINE_HAS_NX) - new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); } pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); } @@ -256,8 +252,6 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr, } else if (flags & SET_MEMORY_DEF) { new = __pud(pud_val(new) & PUD_MASK); new = set_pud_bit(new, REGION3_KERNEL); - if (!MACHINE_HAS_NX) - new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); } pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index cea5dba80468..f05e62e037c2 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -360,8 +360,6 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pgste_t pgste; struct mm_struct *mm = vma->vm_mm; - if (!MACHINE_HAS_NX) - pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC)); if (mm_has_pgste(mm)) { pgste = pgste_get(ptep); pgste_set_key(ptep, pgste, pte, mm); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 665b8228afeb..7c684c54e721 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -171,9 +171,6 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, pte_t *pte; prot = pgprot_val(PAGE_KERNEL); - if (!MACHINE_HAS_NX) - prot &= ~_PAGE_NOEXEC; - pte = pte_offset_kernel(pmd, addr); for (; addr < end; addr += PAGE_SIZE, pte++) { if (!add) { @@ -230,9 +227,6 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, pte_t *pte; prot = pgprot_val(SEGMENT_KERNEL); - if (!MACHINE_HAS_NX) - prot &= ~_SEGMENT_ENTRY_NOEXEC; - pmd = pmd_offset(pud, addr); for (; addr < end; addr = next, pmd++) { next = pmd_addr_end(addr, end); @@ -324,8 +318,6 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, pmd_t *pmd; prot = pgprot_val(REGION3_KERNEL); - if (!MACHINE_HAS_NX) - prot &= ~_REGION_ENTRY_NOEXEC; pud = pud_offset(p4d, addr); for (; addr < end; addr = next, pud++) { next = pud_addr_end(addr, end); -- 2.51.0 From 807e39ed4da2cd7e6fe2c38a8d41d36f4c59e861 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 12 Dec 2024 10:55:03 +0100 Subject: [PATCH 11/16] s390/lib: Use exrl instead of ex in string functions exrl is present in all machines currently supported in the linux kernel, therefore prefer it over ex. This saves one instruction and doesn't need an additional register to hold the address of the target instruction. Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/lib/mem.S | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 08f60a42b9a6..d026debf250c 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -34,8 +34,7 @@ SYM_FUNC_START(__memmove) la %r3,256(%r3) brctg %r0,.Lmemmove_forward_loop .Lmemmove_forward_remainder: - larl %r5,.Lmemmove_mvc - ex %r4,0(%r5) + exrl %r4,.Lmemmove_mvc .Lmemmove_exit: BR_EX %r14 .Lmemmove_reverse: @@ -83,8 +82,7 @@ SYM_FUNC_START(__memset) la %r1,256(%r1) brctg %r3,.Lmemset_clear_loop .Lmemset_clear_remainder: - larl %r3,.Lmemset_xc - ex %r4,0(%r3) + exrl %r4,.Lmemset_xc .Lmemset_exit: BR_EX %r14 .Lmemset_fill: @@ -102,8 +100,7 @@ SYM_FUNC_START(__memset) brctg %r5,.Lmemset_fill_loop .Lmemset_fill_remainder: stc %r3,0(%r1) - larl %r5,.Lmemset_mvc - ex %r4,0(%r5) + exrl %r4,.Lmemset_mvc BR_EX %r14 .Lmemset_fill_exit: stc %r3,0(%r1) @@ -132,8 +129,7 @@ SYM_FUNC_START(__memcpy) lgr %r1,%r2 jnz .Lmemcpy_loop .Lmemcpy_remainder: - larl %r5,.Lmemcpy_mvc - ex %r4,0(%r5) + exrl %r4,.Lmemcpy_mvc .Lmemcpy_exit: BR_EX %r14 .Lmemcpy_loop: @@ -175,8 +171,7 @@ SYM_FUNC_START(__memset\bits) brctg %r5,.L__memset_loop\bits .L__memset_remainder\bits: \insn %r3,0(%r1) - larl %r5,.L__memset_mvc\bits - ex %r4,0(%r5) + exrl %r4,.L__memset_mvc\bits BR_EX %r14 .L__memset_store\bits: \insn %r3,0(%r2) -- 2.51.0 From 2478d43ed621c4d15549142c0cfe6fa4a05e5f10 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Thu, 12 Dec 2024 17:17:18 +0100 Subject: [PATCH 12/16] s390/diag: Create misc device /dev/diag Create a misc device /dev/diag to fetch diagnose specific information from the kernel and provide it to userspace. Reviewed-by: Heiko Carstens Signed-off-by: Sumanth Korikkar Signed-off-by: Alexander Gordeev --- arch/s390/kernel/diag/Makefile | 1 + arch/s390/kernel/diag/diag_misc.c | 45 +++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 arch/s390/kernel/diag/Makefile create mode 100644 arch/s390/kernel/diag/diag_misc.c diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile new file mode 100644 index 000000000000..15bfa866c44d --- /dev/null +++ b/arch/s390/kernel/diag/Makefile @@ -0,0 +1 @@ +obj-y := diag_misc.o diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c new file mode 100644 index 000000000000..cd5b78880d7d --- /dev/null +++ b/arch/s390/kernel/diag/diag_misc.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Provide diagnose information via misc device /dev/diag. + * + * Copyright IBM Corp. 2024 + */ + +#include +#include +#include +#include +#include +#include + +static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + long rc; + + switch (cmd) { + default: + rc = -ENOIOCTLCMD; + break; + } + return rc; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = diag_ioctl, +}; + +static struct miscdevice diagdev = { + .name = "diag", + .minor = MISC_DYNAMIC_MINOR, + .fops = &fops, + .mode = 0444, +}; + +static int diag_init(void) +{ + return misc_register(&diagdev); +} + +device_initcall(diag_init); -- 2.51.0 From 90e6f191e1ee094d13faae5fc29af7b5baf9d1b8 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Thu, 12 Dec 2024 17:17:19 +0100 Subject: [PATCH 13/16] s390/diag324: Retrieve power readings via diag 0x324 Retrieve electrical power readings for resources in a computing environment via diag 0x324. diag 0x324 stores the power readings in the power information block (pib). Provide power readings from pib via diag324 ioctl interface. diag324 ioctl provides new pib to the user only if the threshold time has passed since the last call. Otherwise, cache data is returned. When there are no active readers, cleanup of pib buffer is performed. Reviewed-by: Heiko Carstens Signed-off-by: Sumanth Korikkar Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/diag.h | 1 + arch/s390/include/asm/sclp.h | 1 + arch/s390/include/uapi/asm/diag.h | 24 ++++ arch/s390/kernel/Makefile | 1 + arch/s390/kernel/diag.c | 1 + arch/s390/kernel/diag/Makefile | 2 +- arch/s390/kernel/diag/diag324.c | 224 +++++++++++++++++++++++++++++ arch/s390/kernel/diag/diag_ioctl.h | 10 ++ arch/s390/kernel/diag/diag_misc.c | 9 ++ drivers/s390/char/sclp.h | 4 +- drivers/s390/char/sclp_early.c | 2 + 11 files changed, 277 insertions(+), 2 deletions(-) create mode 100644 arch/s390/include/uapi/asm/diag.h create mode 100644 arch/s390/kernel/diag/diag324.c create mode 100644 arch/s390/kernel/diag/diag_ioctl.h diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index e1316e181230..c1050f4a7107 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -38,6 +38,7 @@ enum diag_stat_enum { DIAG_STAT_X308, DIAG_STAT_X318, DIAG_STAT_X320, + DIAG_STAT_X324, DIAG_STAT_X49C, DIAG_STAT_X500, NR_DIAG_STAT diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index eb00fa1771da..de369a062545 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -89,6 +89,7 @@ struct sclp_info { unsigned char has_gisaf : 1; unsigned char has_diag318 : 1; unsigned char has_diag320 : 1; + unsigned char has_diag324 : 1; unsigned char has_sipl : 1; unsigned char has_sipl_eckd : 1; unsigned char has_dirq : 1; diff --git a/arch/s390/include/uapi/asm/diag.h b/arch/s390/include/uapi/asm/diag.h new file mode 100644 index 000000000000..361c9ba9d4f5 --- /dev/null +++ b/arch/s390/include/uapi/asm/diag.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Diag ioctls and its associated structures definitions. + * + * Copyright IBM Corp. 2024 + */ + +#ifndef __S390_UAPI_ASM_DIAG_H +#define __S390_UAPI_ASM_DIAG_H + +#include + +#define DIAG_MAGIC_STR 'D' + +struct diag324_pib { + __u64 address; + __u64 sequence; +}; + +/* Diag ioctl definitions */ +#define DIAG324_GET_PIBBUF _IOWR(DIAG_MAGIC_STR, 0x77, struct diag324_pib) +#define DIAG324_GET_PIBLEN _IOR(DIAG_MAGIC_STR, 0x78, size_t) + +#endif /* __S390_UAPI_ASM_DIAG_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 48caae8c7e10..887450df44bd 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -44,6 +44,7 @@ obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o +obj-y += diag/ extra-y += vmlinux.lds diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index cdd6e31344fa..d786b9e58a82 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -53,6 +53,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = { [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" }, [DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" }, [DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" }, + [DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" }, [DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" }, [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" }, }; diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile index 15bfa866c44d..6083c1fd2f4b 100644 --- a/arch/s390/kernel/diag/Makefile +++ b/arch/s390/kernel/diag/Makefile @@ -1 +1 @@ -obj-y := diag_misc.o +obj-y := diag_misc.o diag324.o diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c new file mode 100644 index 000000000000..7fa4c0b7eb6c --- /dev/null +++ b/arch/s390/kernel/diag/diag324.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Request power readings for resources in a computing environment via + * diag 0x324. diag 0x324 stores the power readings in the power information + * block (pib). + * + * Copyright IBM Corp. 2024 + */ + +#define pr_fmt(fmt) "diag324: " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "diag_ioctl.h" + +enum subcode { + DIAG324_SUBC_0 = 0, + DIAG324_SUBC_1 = 1, + DIAG324_SUBC_2 = 2, +}; + +enum retcode { + DIAG324_RET_SUCCESS = 0x0001, + DIAG324_RET_SUBC_NOTAVAIL = 0x0103, + DIAG324_RET_INSUFFICIENT_SIZE = 0x0104, + DIAG324_RET_READING_UNAVAILABLE = 0x0105, +}; + +union diag324_response { + u64 response; + struct { + u64 installed : 32; + u64 : 16; + u64 rc : 16; + } sc0; + struct { + u64 format : 16; + u64 : 16; + u64 pib_len : 16; + u64 rc : 16; + } sc1; + struct { + u64 : 48; + u64 rc : 16; + } sc2; +}; + +union diag324_request { + u64 request; + struct { + u64 : 32; + u64 allocated : 16; + u64 : 12; + u64 sc : 4; + } sc2; +}; + +struct pib { + u32 : 8; + u32 num : 8; + u32 len : 16; + u32 : 24; + u32 hlen : 8; + u64 : 64; + u64 intv; + u8 r[]; +} __packed; + +struct pibdata { + struct pib *pib; + ktime_t expire; + u64 sequence; + size_t len; + int rc; +}; + +static DEFINE_MUTEX(pibmutex); +static struct pibdata pibdata; + +#define PIBWORK_DELAY (5 * NSEC_PER_SEC) + +static void pibwork_handler(struct work_struct *work); +static DECLARE_DELAYED_WORK(pibwork, pibwork_handler); + +static unsigned long diag324(unsigned long subcode, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr }; + + diag_stat_inc(DIAG_STAT_X324); + asm volatile("diag %[rp],%[subcode],0x324\n" + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "memory"); + return rp.odd; +} + +static void pibwork_handler(struct work_struct *work) +{ + struct pibdata *data = &pibdata; + ktime_t timedout; + + mutex_lock(&pibmutex); + timedout = ktime_add_ns(data->expire, PIBWORK_DELAY); + if (ktime_before(ktime_get(), timedout)) { + mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY)); + goto out; + } + vfree(data->pib); + data->pib = NULL; +out: + mutex_unlock(&pibmutex); +} + +static void pib_update(struct pibdata *data) +{ + union diag324_request req = { .sc2.sc = DIAG324_SUBC_2, .sc2.allocated = data->len }; + union diag324_response res; + int rc; + + memset(data->pib, 0, data->len); + res.response = diag324(req.request, data->pib); + switch (res.sc2.rc) { + case DIAG324_RET_SUCCESS: + rc = 0; + break; + case DIAG324_RET_SUBC_NOTAVAIL: + rc = -ENOENT; + break; + case DIAG324_RET_INSUFFICIENT_SIZE: + rc = -EMSGSIZE; + break; + case DIAG324_RET_READING_UNAVAILABLE: + rc = -EBUSY; + break; + default: + rc = -EINVAL; + } + data->rc = rc; +} + +long diag324_pibbuf(unsigned long arg) +{ + struct diag324_pib __user *udata = (struct diag324_pib __user *)arg; + struct pibdata *data = &pibdata; + static bool first = true; + u64 address; + int rc; + + if (!data->len) + return -EOPNOTSUPP; + if (get_user(address, &udata->address)) + return -EFAULT; + mutex_lock(&pibmutex); + rc = -ENOMEM; + if (!data->pib) + data->pib = vmalloc(data->len); + if (!data->pib) + goto out; + if (first || ktime_after(ktime_get(), data->expire)) { + pib_update(data); + data->sequence++; + data->expire = ktime_add_ns(ktime_get(), tod_to_ns(data->pib->intv)); + mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY)); + first = false; + } + rc = data->rc; + if (rc != 0 && rc != -EBUSY) + goto out; + rc = copy_to_user((void __user *)address, data->pib, data->pib->len); + rc |= put_user(data->sequence, &udata->sequence); + if (rc) + rc = -EFAULT; +out: + mutex_unlock(&pibmutex); + return rc; +} + +long diag324_piblen(unsigned long arg) +{ + struct pibdata *data = &pibdata; + + if (!data->len) + return -EOPNOTSUPP; + if (put_user(data->len, (size_t __user *)arg)) + return -EFAULT; + return 0; +} + +static int __init diag324_init(void) +{ + union diag324_response res; + unsigned long installed; + + if (!sclp.has_diag324) + return -EOPNOTSUPP; + res.response = diag324(DIAG324_SUBC_0, NULL); + if (res.sc0.rc != DIAG324_RET_SUCCESS) + return -EOPNOTSUPP; + installed = res.response; + if (!test_bit_inv(DIAG324_SUBC_1, &installed)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG324_SUBC_2, &installed)) + return -EOPNOTSUPP; + res.response = diag324(DIAG324_SUBC_1, NULL); + if (res.sc1.rc != DIAG324_RET_SUCCESS) + return -EOPNOTSUPP; + pibdata.len = res.sc1.pib_len; + return 0; +} +device_initcall(diag324_init); diff --git a/arch/s390/kernel/diag/diag_ioctl.h b/arch/s390/kernel/diag/diag_ioctl.h new file mode 100644 index 000000000000..de5365b23e11 --- /dev/null +++ b/arch/s390/kernel/diag/diag_ioctl.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DIAG_IOCTL_H +#define _DIAG_IOCTL_H + +#include + +long diag324_pibbuf(unsigned long arg); +long diag324_piblen(unsigned long arg); + +#endif /* _DIAG_IOCTL_H */ diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c index cd5b78880d7d..1f4f5c757329 100644 --- a/arch/s390/kernel/diag/diag_misc.c +++ b/arch/s390/kernel/diag/diag_misc.c @@ -12,11 +12,20 @@ #include #include +#include +#include "diag_ioctl.h" + static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { long rc; switch (cmd) { + case DIAG324_GET_PIBLEN: + rc = diag324_piblen(arg); + break; + case DIAG324_GET_PIBBUF: + rc = diag324_pibbuf(arg); + break; default: rc = -ENOIOCTLCMD; break; diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index 6c91e422927f..847674ca94c3 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -196,7 +196,9 @@ struct read_info_sccb { u8 byte_134; /* 134 */ u8 cpudirq; /* 135 */ u16 cbl; /* 136-137 */ - u8 _pad_138[EXT_SCCB_READ_SCP - 138]; + u8 byte_138; /* 138 */ + u8 byte_139; /* 139 */ + u8 _pad_140[EXT_SCCB_READ_SCP - 140]; } __packed __aligned(PAGE_SIZE); struct read_storage_sccb { diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 29156455970e..b7a55ea6c77a 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -64,6 +64,8 @@ static void __init sclp_early_facilities_detect(void) sclp.has_sipl = !!(sccb->cbl & 0x4000); sclp.has_sipl_eckd = !!(sccb->cbl & 0x2000); } + if (sccb->cpuoff > 139) + sclp.has_diag324 = !!(sccb->byte_139 & 0x80); sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; sclp.rzm <<= 20; -- 2.51.0 From 388cf16d90f6cce74ce531f306e2c77d6d66f2f7 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Thu, 12 Dec 2024 17:17:20 +0100 Subject: [PATCH 14/16] s390/diag: Move diag.c to diag specific folder Move implementation of s390 diagnose code to diag specific folder. Reviewed-by: Heiko Carstens Signed-off-by: Sumanth Korikkar Signed-off-by: Alexander Gordeev --- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/diag/Makefile | 2 +- arch/s390/kernel/{ => diag}/diag.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename arch/s390/kernel/{ => diag}/diag.c (99%) diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 887450df44bd..db5f3a3faefb 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -38,7 +38,7 @@ CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls obj-y := head64.o traps.o time.o process.o early.o setup.o idle.o vtime.o obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o +obj-y += debug.o irq.o ipl.o dis.o vdso.o cpufeature.o obj-y += sysinfo.o lgr.o os_info.o ctlreg.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o kdebugfs.o alternative.o diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile index 6083c1fd2f4b..44d3e0471151 100644 --- a/arch/s390/kernel/diag/Makefile +++ b/arch/s390/kernel/diag/Makefile @@ -1 +1 @@ -obj-y := diag_misc.o diag324.o +obj-y := diag_misc.o diag324.o diag.o diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag/diag.c similarity index 99% rename from arch/s390/kernel/diag.c rename to arch/s390/kernel/diag/diag.c index d786b9e58a82..91161f09299f 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag/diag.c @@ -17,7 +17,7 @@ #include #include #include -#include "entry.h" +#include "../entry.h" struct diag_stat { unsigned int counter[NR_DIAG_STAT]; -- 2.51.0 From bc3d4402a09cf072a56945e615fc587f8c1bec04 Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Thu, 23 Mar 2023 17:40:41 +0100 Subject: [PATCH 15/16] s390/qdio: Rename feature flag aif_osa to aif_qdio This feature is not only utilized by OSA, but by QDIO in general. Clear up possible confusions. Signed-off-by: Benjamin Block Reviewed-by: Steffen Maier Acked-by: Alexandra Winter Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/css_chars.h | 2 +- drivers/s390/cio/qdio.h | 2 +- drivers/s390/cio/qdio_setup.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h index 638137d46c85..a03f64033760 100644 --- a/arch/s390/include/asm/css_chars.h +++ b/arch/s390/include/asm/css_chars.h @@ -25,7 +25,7 @@ struct css_general_char { u64 : 2; u64 : 3; - u64 aif_osa : 1; /* bit 67 */ + u64 aif_qdio : 1;/* bit 67 */ u64 : 12; u64 eadm_rf : 1; /* bit 80 */ u64 : 1; diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 641f0dbb65a9..5c09f2ef874e 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -266,7 +266,7 @@ struct qdio_irq { #define is_thinint_irq(irq) \ (irq->qib.qfmt == QDIO_IQDIO_QFMT || \ - css_general_characteristics.aif_osa) + css_general_characteristics.aif_qdio) #define qperf(__qdev, __attr) ((__qdev)->perf_stat.(__attr)) diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 99c0fd23022d..1f532b112194 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -423,7 +423,7 @@ int __init qdio_setup_init(void) /* Check for OSA/FCP thin interrupts (bit 67). */ DBF_EVENT("thinint:%1d", - (css_general_characteristics.aif_osa) ? 1 : 0); + (css_general_characteristics.aif_qdio) ? 1 : 0); /* Check for QEBSM support in general (bit 58). */ DBF_EVENT("cssQEBSM:%1d", css_general_characteristics.qebsm); -- 2.51.0 From efd34db6e6813c6e573f34353ce4ad5e81f56dbd Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Tue, 4 Apr 2023 20:33:59 +0200 Subject: [PATCH 16/16] s390/cio: Use array indices instead of pointer arithmetic ccw_device_get_ciw() already uses array indices to iterate over the vector of CIWs, but then switches to pointer arithmetic when returning the one it found. Change this to make it more consistent. Signed-off-by: Benjamin Block Reviewed-by: Vineeth Vijayan Signed-off-by: Alexander Gordeev --- drivers/s390/cio/device_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index acd6790dba4d..61c07b4a0fe8 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -445,7 +445,7 @@ struct ciw *ccw_device_get_ciw(struct ccw_device *cdev, __u32 ct) return NULL; for (ciw_cnt = 0; ciw_cnt < MAX_CIWS; ciw_cnt++) if (cdev->private->dma_area->senseid.ciw[ciw_cnt].ct == ct) - return cdev->private->dma_area->senseid.ciw + ciw_cnt; + return &cdev->private->dma_area->senseid.ciw[ciw_cnt]; return NULL; } -- 2.51.0