From e3032b501385e83af5059b0a06f32c11a374788e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 12 Jun 2019 13:24:49 -0700 Subject: [PATCH] Add disk I/O error monitoring Signed-off-by: Cong Wang --- .travis.yml | 2 +- Makefile.am | 5 +- configure.ac | 10 +++ ras-diskerror-handler.c | 133 ++++++++++++++++++++++++++++++++++++++++ ras-diskerror-handler.h | 29 +++++++++ ras-events.c | 16 +++++ ras-events.h | 1 + ras-record.c | 61 ++++++++++++++++++ ras-record.h | 16 +++++ ras-report.c | 79 ++++++++++++++++++++++++ ras-report.h | 2 + util/ras-mc-ctl.in | 42 ++++++++++++- 12 files changed, 393 insertions(+), 3 deletions(-) create mode 100644 ras-diskerror-handler.c create mode 100644 ras-diskerror-handler.h diff --git a/.travis.yml b/.travis.yml index fbf1444..846e8f6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,7 +20,7 @@ before_install: - sudo apt-get install -y sqlite3 install: - autoreconf -vfi -- ./configure --enable-sqlite3 --enable-aer --enable-non-standard --enable-arm --enable-mce --enable-extlog --enable-devlink --enable-abrt-report --enable-hisi-ns-decode +- ./configure --enable-sqlite3 --enable-aer --enable-non-standard --enable-arm --enable-mce --enable-extlog --enable-devlink --enable-diskerror --enable-abrt-report --enable-hisi-ns-decode script: - make && sudo make install diff --git a/Makefile.am b/Makefile.am index 3d89672..843b538 100644 --- a/Makefile.am +++ b/Makefile.am @@ -45,6 +45,9 @@ endif if WITH_DEVLINK rasdaemon_SOURCES += ras-devlink-handler.c endif +if WITH_DISKERROR + rasdaemon_SOURCES += ras-diskerror-handler.c +endif if WITH_ABRT_REPORT rasdaemon_SOURCES += ras-report.c endif @@ -56,7 +59,7 @@ rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ - ras-devlink-handler.h + ras-devlink-handler.h ras-diskerror-handler.h # This rule can't be called with more than one Makefile job (like make -j8) # I can't figure out a way to fix that diff --git a/configure.ac b/configure.ac index fecff51..90a06b3 100644 --- a/configure.ac +++ b/configure.ac @@ -89,6 +89,15 @@ AS_IF([test "x$enable_devlink" = "xyes"], [ ]) AM_CONDITIONAL([WITH_DEVLINK], [test x$enable_devlink = xyes]) +AC_ARG_ENABLE([diskerror], + AS_HELP_STRING([--enable-diskerror], [enable disk I/O error events (currently experimental)])) + +AS_IF([test "x$enable_diskerror" = "xyes"], [ + AC_DEFINE(HAVE_DISKERROR,1,"have disk I/O errors collect") + AC_SUBST([WITH_DISKERROR]) +]) +AM_CONDITIONAL([WITH_DISKERROR], [test x$enable_diskerror = xyes]) + AC_ARG_ENABLE([abrt_report], AS_HELP_STRING([--enable-abrt-report], [enable report event to ABRT (currently experimental)])) @@ -137,4 +146,5 @@ compile time options summary HIP07 SAS HW errors : $enable_hisi_ns_decode ARM events : $enable_arm DEVLINK : $enable_devlink + Disk I/O errors : $enable_diskerror EOF diff --git a/ras-diskerror-handler.c b/ras-diskerror-handler.c new file mode 100644 index 0000000..5277c47 --- /dev/null +++ b/ras-diskerror-handler.c @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2019 Cong Wang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include "libtrace/kbuffer.h" +#include "ras-diskerror-handler.h" +#include "ras-record.h" +#include "ras-logger.h" +#include "ras-report.h" + + +static const struct { + int error; + const char *name; +} blk_errors[] = { + { -EOPNOTSUPP, "operation not supported error" }, + { -ETIMEDOUT, "timeout error" }, + { -ENOSPC, "critical space allocation error" }, + { -ENOLINK, "recoverable transport error" }, + { -EREMOTEIO, "critical target error" }, + { -EBADE, "critical nexus error" }, + { -ENODATA, "critical medium error" }, + { -EILSEQ, "protection error" }, + { -ENOMEM, "kernel resource error" }, + { -EBUSY, "device resource error" }, + { -EAGAIN, "nonblocking retry error" }, + { -EREMCHG, "dm internal retry error" }, + { -EIO, "I/O error" }, +}; + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(*(x))) + +static const char *get_blk_error(int err) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(blk_errors); i++) + if (blk_errors[i].error == err) + return blk_errors[i].name; + return "unknown block error"; +} + +int ras_diskerror_event_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context) +{ + unsigned long long val; + int len; + struct ras_events *ras = context; + time_t now; + struct tm *tm; + struct diskerror_event ev; + dev_t dev; + + if (ras->filters[DISKERROR_EVENT] && + pevent_filter_match(ras->filters[DISKERROR_EVENT], record) == FILTER_MATCH) + return 0; + /* + * Newer kernels (3.10-rc1 or upper) provide an uptime clock. + * On previous kernels, the way to properly generate an event would + * be to inject a fake one, measure its timestamp and diff it against + * gettimeofday. We won't do it here. Instead, let's use uptime, + * falling-back to the event report's time, if "uptime" clock is + * not available (legacy kernels). + */ + + if (ras->use_uptime) + now = record->ts/user_hz + ras->uptime_diff; + else + now = time(NULL); + + tm = localtime(&now); + if (tm) + strftime(ev.timestamp, sizeof(ev.timestamp), + "%Y-%m-%d %H:%M:%S %z", tm); + trace_seq_printf(s, "%s ", ev.timestamp); + + if (pevent_get_field_val(s, event, "dev", record, &val, 1) < 0) + return -1; + dev = (dev_t)val; + asprintf(&ev.dev, "%u:%u", major(dev), minor(dev)); + + if (pevent_get_field_val(s, event, "sector", record, &val, 1) < 0) + return -1; + ev.sector = val; + + if (pevent_get_field_val(s, event, "nr_sector", record, &val, 1) < 0) + return -1; + ev.nr_sector = (unsigned int)val; + + if (pevent_get_field_val(s, event, "error", record, &val, 1) < 0) + return -1; + ev.error = get_blk_error((int)val); + + ev.rwbs = pevent_get_field_raw(s, event, "rwbs", record, &len, 1); + if (!ev.rwbs) + return -1; + + ev.cmd = pevent_get_field_raw(s, event, "cmd", record, &len, 1); + if (!ev.cmd) + return -1; + + /* Insert data into the SGBD */ +#ifdef HAVE_SQLITE3 + ras_store_diskerror_event(ras, &ev); +#endif + +#ifdef HAVE_ABRT_REPORT + /* Report event to ABRT */ + ras_report_diskerror_event(ras, &ev); +#endif + free(ev.dev); + return 0; +} diff --git a/ras-diskerror-handler.h b/ras-diskerror-handler.h new file mode 100644 index 0000000..002f1c4 --- /dev/null +++ b/ras-diskerror-handler.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2019 Cong Wang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifndef __RAS_DISKERROR_HANDLER_H +#define __RAS_DISKERROR_HANDLER_H + +#include "ras-events.h" +#include "libtrace/event-parse.h" + +int ras_diskerror_event_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context); + +#endif diff --git a/ras-events.c b/ras-events.c index d7286ed..f703dbc 100644 --- a/ras-events.c +++ b/ras-events.c @@ -34,6 +34,7 @@ #include "ras-mce-handler.h" #include "ras-extlog-handler.h" #include "ras-devlink-handler.h" +#include "ras-diskerror-handler.h" #include "ras-record.h" #include "ras-logger.h" @@ -223,6 +224,10 @@ int toggle_ras_mc_event(int enable) rc |= __toggle_ras_mc_event(ras, "devlink", "devlink_health_report", enable); #endif +#ifdef HAVE_DISKERROR + rc |= __toggle_ras_mc_event(ras, "block", "block_rq_complete", enable); +#endif + free_ras: free(ras); return rc; @@ -784,6 +789,17 @@ int handle_ras_events(int record_events) "devlink", "devlink_health_report"); #endif +#ifdef HAVE_DISKERROR + rc = add_event_handler(ras, pevent, page_size, "block", + "block_rq_complete", ras_diskerror_event_handler, + "block/block_rq_complete:error==0", DISKERROR_EVENT); + if (!rc) + num_events++; + else + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "block", "block_rq_complete"); +#endif + if (!num_events) { log(ALL, LOG_INFO, "Failed to trace all supported RAS events. Aborting.\n"); diff --git a/ras-events.h b/ras-events.h index 0234f32..f028741 100644 --- a/ras-events.h +++ b/ras-events.h @@ -37,6 +37,7 @@ enum { ARM_EVENT, EXTLOG_EVENT, DEVLINK_EVENT, + DISKERROR_EVENT, NR_EVENTS }; diff --git a/ras-record.c b/ras-record.c index b212607..30c063b 100644 --- a/ras-record.c +++ b/ras-record.c @@ -445,6 +445,60 @@ int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev) } #endif +/* + * Table and functions to handle block:block_rq_complete + */ + +#ifdef HAVE_DISKERROR +static const struct db_fields diskerror_event_fields[] = { + { .name="id", .type="INTEGER PRIMARY KEY" }, + { .name="timestamp", .type="TEXT" }, + { .name="dev", .type="TEXT" }, + { .name="sector", .type="INTEGER" }, + { .name="nr_sector", .type="INTEGER" }, + { .name="error", .type="TEXT" }, + { .name="rwbs", .type="TEXT" }, + { .name="cmd", .type="TEXT" }, +}; + +static const struct db_table_descriptor diskerror_event_tab = { + .name = "disk_errors", + .fields = diskerror_event_fields, + .num_fields = ARRAY_SIZE(diskerror_event_fields), +}; + +int ras_store_diskerror_event(struct ras_events *ras, struct diskerror_event *ev) +{ + int rc; + struct sqlite3_priv *priv = ras->db_priv; + + if (!priv || !priv->stmt_diskerror_event) + return 0; + log(TERM, LOG_INFO, "diskerror_eventstore: %p\n", priv->stmt_diskerror_event); + + sqlite3_bind_text(priv->stmt_diskerror_event, 1, ev->timestamp, -1, NULL); + sqlite3_bind_text(priv->stmt_diskerror_event, 2, ev->dev, -1, NULL); + sqlite3_bind_int64(priv->stmt_diskerror_event, 3, ev->sector); + sqlite3_bind_int(priv->stmt_diskerror_event, 4, ev->nr_sector); + sqlite3_bind_text(priv->stmt_diskerror_event, 5, ev->error, -1, NULL); + sqlite3_bind_text(priv->stmt_diskerror_event, 6, ev->rwbs, -1, NULL); + sqlite3_bind_text(priv->stmt_diskerror_event, 7, ev->cmd, -1, NULL); + + rc = sqlite3_step(priv->stmt_diskerror_event); + if (rc != SQLITE_OK && rc != SQLITE_DONE) + log(TERM, LOG_ERR, + "Failed to do diskerror_event step on sqlite: error = %d\n", rc); + rc = sqlite3_reset(priv->stmt_diskerror_event); + if (rc != SQLITE_OK && rc != SQLITE_DONE) + log(TERM, LOG_ERR, + "Failed reset diskerror_event on sqlite: error = %d\n", + rc); + log(TERM, LOG_INFO, "register inserted at db\n"); + + return rc; +} +#endif + /* * Generic code */ @@ -631,6 +685,13 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) &devlink_event_tab); #endif +#ifdef HAVE_DISKERROR + rc = ras_mc_create_table(priv, &diskerror_event_tab); + if (rc == SQLITE_OK) + rc = ras_mc_prepare_stmt(priv, &priv->stmt_diskerror_event, + &diskerror_event_tab); +#endif + ras->db_priv = priv; return 0; } diff --git a/ras-record.h b/ras-record.h index 432a571..5311c67 100644 --- a/ras-record.h +++ b/ras-record.h @@ -84,6 +84,16 @@ struct devlink_event { char *msg; }; +struct diskerror_event { + char timestamp[64]; + char *dev; + unsigned long long sector; + unsigned int nr_sector; + const char *error; + const char *rwbs; + const char *cmd; +}; + struct ras_mc_event; struct ras_aer_event; struct ras_extlog_event; @@ -91,6 +101,7 @@ struct ras_non_standard_event; struct ras_arm_event; struct mce_event; struct devlink_event; +struct diskerror_event; #ifdef HAVE_SQLITE3 @@ -117,6 +128,9 @@ struct sqlite3_priv { #ifdef HAVE_DEVLINK sqlite3_stmt *stmt_devlink_event; #endif +#ifdef HAVE_DISKERROR + sqlite3_stmt *stmt_diskerror_event; +#endif }; struct db_fields { @@ -140,6 +154,7 @@ int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev); int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev); int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev); +int ras_store_diskerror_event(struct ras_events *ras, struct diskerror_event *ev); #else static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; }; @@ -150,6 +165,7 @@ static inline int ras_store_extlog_mem_record(struct ras_events *ras, struct ras static inline int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; }; static inline int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev) { return 0; }; static inline int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev) { return 0; }; +static inline int ras_store_diskerror_event(struct ras_events *ras, struct diskerror_event *ev) { return 0; }; #endif diff --git a/ras-report.c b/ras-report.c index 785a302..661da84 100644 --- a/ras-report.c +++ b/ras-report.c @@ -281,6 +281,33 @@ static int set_devlink_event_backtrace(char *buf, struct devlink_event *ev){ return 0; } +static int set_diskerror_event_backtrace(char *buf, struct diskerror_event *ev) { + char bt_buf[MAX_BACKTRACE_SIZE]; + + if(!buf || !ev) + return -1; + + sprintf(bt_buf, "BACKTRACE=" \ + "timestamp=%s\n" \ + "dev=%s\n" \ + "sector=%llu\n" \ + "nr_sector=%u\n" \ + "error=%s\n" \ + "rwbs=%s\n" \ + "cmd=%s\n", \ + ev->timestamp, \ + ev->dev, \ + ev->sector, \ + ev->nr_sector, \ + ev->error, \ + ev->rwbs, \ + ev->cmd); + + strcat(buf, bt_buf); + + return 0; +} + static int commit_report_backtrace(int sockfd, int type, void *ev){ char buf[MAX_BACKTRACE_SIZE]; char *pbuf = buf; @@ -312,6 +339,9 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){ case DEVLINK_EVENT: rc = set_devlink_event_backtrace(buf, (struct devlink_event *)ev); break; + case DISKERROR_EVENT: + rc = set_diskerror_event_backtrace(buf, (struct diskerror_event *)ev); + break; default: return -1; } @@ -628,3 +658,52 @@ devlink_fail: return -1; } } + +int ras_report_diskerror_event(struct ras_events *ras, struct diskerror_event *ev){ + char buf[MAX_MESSAGE_SIZE]; + int sockfd = 0; + int done = 0; + int rc = -1; + + memset(buf, 0, sizeof(buf)); + + sockfd = setup_report_socket(); + if(sockfd < 0){ + return -1; + } + + rc = commit_report_basic(sockfd); + if(rc < 0){ + goto diskerror_fail; + } + + rc = commit_report_backtrace(sockfd, DISKERROR_EVENT, ev); + if(rc < 0){ + goto diskerror_fail; + } + + sprintf(buf, "ANALYZER=%s", "rasdaemon-diskerror"); + rc = write(sockfd, buf, strlen(buf) + 1); + if(rc < strlen(buf) + 1){ + goto diskerror_fail; + } + + sprintf(buf, "REASON=%s", "disk I/O error"); + rc = write(sockfd, buf, strlen(buf) + 1); + if(rc < strlen(buf) + 1){ + goto diskerror_fail; + } + + done = 1; + +diskerror_fail: + if(sockfd > 0){ + close(sockfd); + } + + if(done){ + return 0; + }else{ + return -1; + } +} diff --git a/ras-report.h b/ras-report.h index 2addccb..1d911de 100644 --- a/ras-report.h +++ b/ras-report.h @@ -37,6 +37,7 @@ int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev); int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev); int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev); int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev); +int ras_report_diskerror_event(struct ras_events *ras, struct diskerror_event *ev); #else @@ -46,6 +47,7 @@ static inline int ras_report_mce_event(struct ras_events *ras, struct mce_event static inline int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; }; static inline int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev) { return 0; }; static inline int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev) { return 0; }; +static inline int ras_report_diskerror_event(struct ras_events *ras, struct diskerror_event *ev) { return 0; }; #endif diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in index 3adc2f9..bfa9524 100755 --- a/util/ras-mc-ctl.in +++ b/util/ras-mc-ctl.in @@ -1122,7 +1122,7 @@ sub summary my ($query, $query_handle, $out); my ($err_type, $label, $mc, $top, $mid, $low, $count, $msg); my ($etype, $severity, $etype_string, $severity_string); - my $dev_name; + my ($dev_name, $dev); my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); @@ -1192,6 +1192,22 @@ sub summary } $query_handle->finish; + # Disk errors + $query = "select dev, count(*) from disk_errors group by dev"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($dev, $count)); + $out = ""; + while($query_handle->fetch()) { + $out .= "\t$dev has $count errors\n"; + } + if ($out ne "") { + print "Disk errors summary:\n$out"; + } else { + print "No disk errors.\n"; + } + $query_handle->finish; + # MCE mce_record errors $query = "select error_msg, count(*) from mce_record group by error_msg"; $query_handle = $dbh->prepare($query); @@ -1218,6 +1234,7 @@ sub errors my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location); my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data); my ($bus_name, $dev_name, $driver_name, $reporter_name); + my ($dev, $sector, $nr_sector, $error, $rwbs, $cmd); my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); @@ -1304,6 +1321,29 @@ sub errors } $query_handle->finish; + # Disk errors + $query = "select id, timestamp, dev, sector, nr_sector, error, rwbs, cmd from disk_errors order by id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $timestamp, $dev, $sector, $nr_sector, $error, $rwbs, $cmd)); + $out = ""; + while($query_handle->fetch()) { + $out .= "$id $timestamp error: "; + $out .= "dev=$dev, "; + $out .= "sector=$sector, "; + $out .= "nr_sector=$nr_sector, "; + $out .= "error='$error', "; + $out .= "rwbs='$rwbs', "; + $out .= "cmd='$cmd', "; + $out .= "\n"; + } + if ($out ne "") { + print "Disk errors\n$out\n"; + } else { + print "No disk errors.\n\n"; + } + $query_handle->finish; + # MCE mce_record errors $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id"; $query_handle = $dbh->prepare($query); -- 2.50.1