From: Tony Luck Date: Tue, 28 Dec 2010 22:25:21 +0000 (-0800) Subject: pstore: new filesystem interface to platform persistent storage X-Git-Tag: v2.6.39-rc1~465^2^2~2 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=ca01d6dd2d7a2652000307520777538740efc286;p=users%2Fhch%2Fdma-mapping.git pstore: new filesystem interface to platform persistent storage Some platforms have a small amount of non-volatile storage that can be used to store information useful to diagnose the cause of a system crash. This is the generic part of a file system interface that presents information from the crash as a series of files in /dev/pstore. Once the information has been seen, the underlying storage is freed by deleting the files. Signed-off-by: Tony Luck --- diff --git a/Documentation/ABI/testing/pstore b/Documentation/ABI/testing/pstore new file mode 100644 index 000000000000..f1fb2a004264 --- /dev/null +++ b/Documentation/ABI/testing/pstore @@ -0,0 +1,35 @@ +Where: /dev/pstore/... +Date: January 2011 +Kernel Version: 2.6.38 +Contact: tony.luck@intel.com +Description: Generic interface to platform dependent persistent storage. + + Platforms that provide a mechanism to preserve some data + across system reboots can register with this driver to + provide a generic interface to show records captured in + the dying moments. In the case of a panic the last part + of the console log is captured, but other interesting + data can also be saved. + + # mount -t pstore - /dev/pstore + + $ ls -l /dev/pstore + total 0 + -r--r--r-- 1 root root 7896 Nov 30 15:38 dmesg-erst-1 + + Different users of this interface will result in different + filename prefixes. Currently two are defined: + + "dmesg" - saved console log + "mce" - architecture dependent data from fatal h/w error + + Once the information in a file has been read, removing + the file will signal to the underlying persistent storage + device that it can reclaim the space for later re-use. + + $ rm /dev/pstore/dmesg-erst-1 + + The expectation is that all files in /dev/pstore + will be saved elsewhere and erased from persistent store + soon after boot to free up space ready for the next + catastrophe. diff --git a/Documentation/ABI/testing/sysfs-fs-pstore b/Documentation/ABI/testing/sysfs-fs-pstore new file mode 100644 index 000000000000..8e659d854805 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-fs-pstore @@ -0,0 +1,7 @@ +What: /sys/fs/pstore/kmsg_bytes +Date: January 2011 +Kernel Version: 2.6.38 +Contact: "Tony Luck" +Description: + Controls amount of console log that will be saved + to persistent store on oops/panic. diff --git a/fs/Kconfig b/fs/Kconfig index 771f457402d4..2bbe47fec1ec 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -188,6 +188,7 @@ source "fs/omfs/Kconfig" source "fs/hpfs/Kconfig" source "fs/qnx4/Kconfig" source "fs/romfs/Kconfig" +source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/exofs/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index a7f7cef0c0c8..db71a5b21a4f 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -121,3 +121,4 @@ obj-$(CONFIG_BTRFS_FS) += btrfs/ obj-$(CONFIG_GFS2_FS) += gfs2/ obj-$(CONFIG_EXOFS_FS) += exofs/ obj-$(CONFIG_CEPH_FS) += ceph/ +obj-$(CONFIG_PSTORE) += pstore/ diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig new file mode 100644 index 000000000000..867d0ac026ce --- /dev/null +++ b/fs/pstore/Kconfig @@ -0,0 +1,13 @@ +config PSTORE + bool "Persistant store support" + default n + help + This option enables generic access to platform level + persistent storage via "pstore" filesystem that can + be mounted as /dev/pstore. Only useful if you have + a platform level driver that registers with pstore to + provide the data, so you probably should just go say "Y" + (or "M") to a platform specific persistent store driver + (e.g. ACPI_APEI on X86) which will select this for you. + If you don't have a platform persistent store driver, + say N. diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile new file mode 100644 index 000000000000..760f4bce7d1d --- /dev/null +++ b/fs/pstore/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the linux pstorefs routines. +# + +obj-y += pstore.o + +pstore-objs += inode.o platform.o diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c new file mode 100644 index 000000000000..0e806aafe857 --- /dev/null +++ b/fs/pstore/inode.c @@ -0,0 +1,280 @@ +/* + * Persistent Storage - ramfs parts. + * + * Copyright (C) 2010 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +#define PSTORE_NAMELEN 64 + +struct pstore_private { + u64 id; + int (*erase)(u64); +}; + +#define pstore_get_inode ramfs_get_inode + +/* + * When a file is unlinked from our file system we call the + * platform driver to erase the record from persistent store. + */ +static int pstore_unlink(struct inode *dir, struct dentry *dentry) +{ + struct pstore_private *p = dentry->d_inode->i_private; + + p->erase(p->id); + kfree(p); + + return simple_unlink(dir, dentry); +} + +static const struct inode_operations pstore_dir_inode_operations = { + .lookup = simple_lookup, + .unlink = pstore_unlink, +}; + +static const struct super_operations pstore_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .show_options = generic_show_options, +}; + +static struct super_block *pstore_sb; +static struct vfsmount *pstore_mnt; + +int pstore_is_mounted(void) +{ + return pstore_mnt != NULL; +} + +/* + * Set up a file structure as if we had opened this file and + * write our data to it. + */ +static int pstore_writefile(struct inode *inode, struct dentry *dentry, + char *data, size_t size) +{ + struct file f; + ssize_t n; + mm_segment_t old_fs = get_fs(); + + memset(&f, '0', sizeof f); + f.f_mapping = inode->i_mapping; + f.f_path.dentry = dentry; + f.f_path.mnt = pstore_mnt; + f.f_pos = 0; + f.f_op = inode->i_fop; + set_fs(KERNEL_DS); + n = do_sync_write(&f, data, size, &f.f_pos); + set_fs(old_fs); + + fsnotify_modify(&f); + + return n == size; +} + +/* + * Make a regular file in the root directory of our file system. + * Load it up with "size" bytes of data from "buf". + * Set the mtime & ctime to the date that this record was originally stored. + */ +int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, + char *data, size_t size, + struct timespec time, int (*erase)(u64)) +{ + struct dentry *root = pstore_sb->s_root; + struct dentry *dentry; + struct inode *inode; + int rc; + char name[PSTORE_NAMELEN]; + struct pstore_private *private; + + rc = -ENOMEM; + inode = pstore_get_inode(pstore_sb, root->d_inode, S_IFREG | 0444, 0); + if (!inode) + goto fail; + inode->i_uid = inode->i_gid = 0; + private = kmalloc(sizeof *private, GFP_KERNEL); + if (!private) + goto fail_alloc; + private->id = id; + private->erase = erase; + + switch (type) { + case PSTORE_TYPE_DMESG: + sprintf(name, "dmesg-%s-%lld", psname, id); + break; + case PSTORE_TYPE_MCE: + sprintf(name, "mce-%s-%lld", psname, id); + break; + case PSTORE_TYPE_UNKNOWN: + sprintf(name, "unknown-%s-%lld", psname, id); + break; + default: + sprintf(name, "type%d-%s-%lld", type, psname, id); + break; + } + + mutex_lock(&root->d_inode->i_mutex); + + rc = -ENOSPC; + dentry = d_alloc_name(root, name); + if (IS_ERR(dentry)) + goto fail_lockedalloc; + + d_add(dentry, inode); + + mutex_unlock(&root->d_inode->i_mutex); + + if (!pstore_writefile(inode, dentry, data, size)) + goto fail_write; + + inode->i_private = private; + + if (time.tv_sec) + inode->i_mtime = inode->i_ctime = time; + + return 0; + +fail_write: + kfree(private); + inode->i_nlink--; + mutex_lock(&root->d_inode->i_mutex); + d_delete(dentry); + dput(dentry); + mutex_unlock(&root->d_inode->i_mutex); + goto fail; + +fail_lockedalloc: + mutex_unlock(&root->d_inode->i_mutex); + kfree(private); +fail_alloc: + iput(inode); + +fail: + return rc; +} + +int pstore_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *inode = NULL; + struct dentry *root; + int err; + + save_mount_options(sb, data); + + pstore_sb = sb; + + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = PSTOREFS_MAGIC; + sb->s_op = &pstore_ops; + sb->s_time_gran = 1; + + inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0); + if (!inode) { + err = -ENOMEM; + goto fail; + } + /* override ramfs "dir" options so we catch unlink(2) */ + inode->i_op = &pstore_dir_inode_operations; + + root = d_alloc_root(inode); + sb->s_root = root; + if (!root) { + err = -ENOMEM; + goto fail; + } + + pstore_get_records(); + + return 0; +fail: + iput(inode); + return err; +} + +static int pstore_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) +{ + struct dentry *root; + + root = mount_nodev(fs_type, flags, data, pstore_fill_super); + if (IS_ERR(root)) + return -ENOMEM; + + mnt->mnt_root = root; + mnt->mnt_sb = root->d_sb; + pstore_mnt = mnt; + + return 0; +} + +static void pstore_kill_sb(struct super_block *sb) +{ + kill_litter_super(sb); + pstore_sb = NULL; + pstore_mnt = NULL; +} + +static struct file_system_type pstore_fs_type = { + .name = "pstore", + .get_sb = pstore_get_sb, + .kill_sb = pstore_kill_sb, +}; + +static int __init init_pstore_fs(void) +{ + int ret = 0; + struct kobject *pstorefs_kobj; + + pstorefs_kobj = kobject_create_and_add("pstore", fs_kobj); + if (!pstorefs_kobj) + return -ENOMEM; + + sysfs_create_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr); + + ret = register_filesystem(&pstore_fs_type); + + if (ret) { + sysfs_remove_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr); + kobject_put(pstorefs_kobj); + } + + return ret; +} +module_init(init_pstore_fs) + +MODULE_AUTHOR("Tony Luck "); +MODULE_LICENSE("GPL"); diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h new file mode 100644 index 000000000000..76c26d2fab29 --- /dev/null +++ b/fs/pstore/internal.h @@ -0,0 +1,7 @@ +extern void pstore_get_records(void); +extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, + char *data, size_t size, + struct timespec time, int (*erase)(u64)); +extern int pstore_is_mounted(void); + +extern struct kobj_attribute pstore_kmsg_bytes_attr; diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c new file mode 100644 index 000000000000..705fdf8abf6e --- /dev/null +++ b/fs/pstore/platform.c @@ -0,0 +1,202 @@ +/* + * Persistent Storage - platform driver interface parts. + * + * Copyright (C) 2010 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +/* + * pstore_lock just protects "psinfo" during + * calls to pstore_register() + */ +static DEFINE_SPINLOCK(pstore_lock); +static struct pstore_info *psinfo; + +/* How much of the console log to snapshot. /sys/fs/pstore/kmsg_bytes */ +static unsigned long kmsg_bytes = 10240; + +static ssize_t b_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%lu\n", kmsg_bytes); +} + +static ssize_t b_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + return (sscanf(buf, "%lu", &kmsg_bytes) > 0) ? count : 0; +} + +struct kobj_attribute pstore_kmsg_bytes_attr = + __ATTR(kmsg_bytes, S_IRUGO | S_IWUSR, b_show, b_store); + +/* Tag each group of saved records with a sequence number */ +static int oopscount; + +/* + * callback from kmsg_dump. (s2,l2) has the most recently + * written bytes, older bytes are in (s1,l1). Save as much + * as we can from the end of the buffer. + */ +static void pstore_dump(struct kmsg_dumper *dumper, + enum kmsg_dump_reason reason, + const char *s1, unsigned long l1, + const char *s2, unsigned long l2) +{ + unsigned long s1_start, s2_start; + unsigned long l1_cpy, l2_cpy; + unsigned long size, total = 0; + char *dst; + u64 id; + int hsize, part = 1; + + mutex_lock(&psinfo->buf_mutex); + oopscount++; + while (total < kmsg_bytes) { + dst = psinfo->buf; + hsize = sprintf(dst, "Oops#%d Part%d\n", oopscount, part++); + size = psinfo->bufsize - hsize; + dst += hsize; + + l2_cpy = min(l2, size); + l1_cpy = min(l1, size - l2_cpy); + + if (l1_cpy + l2_cpy == 0) + break; + + s2_start = l2 - l2_cpy; + s1_start = l1 - l1_cpy; + + memcpy(dst, s1 + s1_start, l1_cpy); + memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); + + id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy); + if (pstore_is_mounted()) + pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, + psinfo->buf, hsize + l1_cpy + l2_cpy, + CURRENT_TIME, psinfo->erase); + l1 -= l1_cpy; + l2 -= l2_cpy; + total += l1_cpy + l2_cpy; + } + mutex_unlock(&psinfo->buf_mutex); +} + +static struct kmsg_dumper pstore_dumper = { + .dump = pstore_dump, +}; + +/* + * platform specific persistent storage driver registers with + * us here. If pstore is already mounted, call the platform + * read function right away to populate the file system. If not + * then the pstore mount code will call us later to fill out + * the file system. + * + * Register with kmsg_dump to save last part of console log on panic. + */ +int pstore_register(struct pstore_info *psi) +{ + struct module *owner = psi->owner; + + spin_lock(&pstore_lock); + if (psinfo) { + spin_unlock(&pstore_lock); + return -EBUSY; + } + psinfo = psi; + spin_unlock(&pstore_lock); + + if (owner && !try_module_get(owner)) { + psinfo = NULL; + return -EINVAL; + } + + if (pstore_is_mounted()) + pstore_get_records(); + + kmsg_dump_register(&pstore_dumper); + + return 0; +} +EXPORT_SYMBOL_GPL(pstore_register); + +/* + * Read all the records from the persistent store. Create and + * file files in our filesystem. + */ +void pstore_get_records(void) +{ + struct pstore_info *psi = psinfo; + size_t size; + u64 id; + enum pstore_type_id type; + struct timespec time; + int failed = 0; + + if (!psi) + return; + + mutex_lock(&psinfo->buf_mutex); + while ((size = psi->read(&id, &type, &time)) > 0) { + if (pstore_mkfile(type, psi->name, id, psi->buf, size, + time, psi->erase)) + failed++; + } + mutex_unlock(&psinfo->buf_mutex); + + if (failed) + printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n", + failed, psi->name); +} + +/* + * Call platform driver to write a record to the + * persistent store. + */ +int pstore_write(enum pstore_type_id type, char *buf, size_t size) +{ + u64 id; + + if (!psinfo) + return -ENODEV; + + if (size > psinfo->bufsize) + return -EFBIG; + + mutex_lock(&psinfo->buf_mutex); + memcpy(psinfo->buf, buf, size); + id = psinfo->write(type, size); + if (pstore_is_mounted()) + pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, + size, CURRENT_TIME, psinfo->erase); + mutex_unlock(&psinfo->buf_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(pstore_write); diff --git a/include/linux/magic.h b/include/linux/magic.h index ff690d05f129..e87fd5ac3e5e 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -26,6 +26,7 @@ #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 #define ANON_INODE_FS_MAGIC 0x09041934 +#define PSTOREFS_MAGIC 0x6165676C #define MINIX_SUPER_MAGIC 0x137F /* original minix fs */ #define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */ diff --git a/include/linux/pstore.h b/include/linux/pstore.h new file mode 100644 index 000000000000..41977737bb7d --- /dev/null +++ b/include/linux/pstore.h @@ -0,0 +1,60 @@ +/* + * Persistent Storage - pstore.h + * + * Copyright (C) 2010 Intel Corporation + * + * This code is the generic layer to export data records from platform + * level persistent storage via a file system. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _LINUX_PSTORE_H +#define _LINUX_PSTORE_H + +/* types */ +enum pstore_type_id { + PSTORE_TYPE_DMESG = 0, + PSTORE_TYPE_MCE = 1, + PSTORE_TYPE_UNKNOWN = 255 +}; + +struct pstore_info { + struct module *owner; + char *name; + struct mutex buf_mutex; /* serialize access to 'buf' */ + char *buf; + size_t bufsize; + size_t (*read)(u64 *id, enum pstore_type_id *type, + struct timespec *time); + u64 (*write)(enum pstore_type_id type, size_t size); + int (*erase)(u64 id); +}; + +#ifdef CONFIG_PSTORE +extern int pstore_register(struct pstore_info *); +extern int pstore_write(enum pstore_type_id type, char *buf, size_t size); +#else +static inline int +pstore_register(struct pstore_info *psi) +{ + return -ENODEV; +} +static inline int +pstore_write(enum pstore_type_id type, char *buf, size_t size) +{ + return -ENODEV; +} +#endif + +#endif /*_LINUX_PSTORE_H*/