From fb3c490dd1b41a89fabc86b7800bdfb601b4c5d7 Mon Sep 17 00:00:00 2001 From: Satish Kharat Date: Tue, 7 Mar 2017 11:35:44 -0800 Subject: [PATCH] fnic: Adding debug IO, Abort latency counter and check condition count to fnic stats Orabug: 25638880 The IO and Abort latency counter count the time taken to complete the IO and abort command into broad buckets. It is not intend for performance measurement, just a debug stat. current_max_io_time tries to keep tarck of the maximun time an IO has taken to complete if it > 30sec, it just another debug stat. Just a simple counter of number of check conditions encountered on that host. Signed-off-by: Satish Kharat Signed-off-by: Somasundaram Krishnasamy Reviewed-by: Brian Maly --- drivers/scsi/fnic/fnic_scsi.c | 56 ++++++++++++++++++++++++++++++++++ drivers/scsi/fnic/fnic_stats.h | 16 ++++++++++ drivers/scsi/fnic/fnic_trace.c | 49 ++++++++++++++++++++++++++--- 3 files changed, 117 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 13a5e8269ca7..710820acd092 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -807,6 +807,7 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, spinlock_t *io_lock; u64 cmd_trace; unsigned long start_time; + unsigned long io_duration_time; /* Decode the cmpl description to get the io_req id */ fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag); @@ -899,6 +900,9 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, if (icmnd_cmpl->flags & FCPIO_ICMND_CMPL_RESID_UNDER) xfer_len -= icmnd_cmpl->residual; + if (icmnd_cmpl->scsi_status == SAM_STAT_CHECK_CONDITION) + atomic64_inc(&fnic_stats->misc_stats.check_condition); + if (icmnd_cmpl->scsi_status == SAM_STAT_TASK_SET_FULL) atomic64_inc(&fnic_stats->misc_stats.queue_fulls); break; @@ -998,6 +1002,28 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, else atomic64_inc(&fnic_stats->io_stats.io_completions); + + io_duration_time = jiffies_to_msecs(jiffies) - jiffies_to_msecs(io_req->start_time); + + if(io_duration_time <= 10) + atomic64_inc(&fnic_stats->io_stats.io_btw_0_to_10_msec); + else if(io_duration_time <= 100) + atomic64_inc(&fnic_stats->io_stats.io_btw_10_to_100_msec); + else if(io_duration_time <= 500) + atomic64_inc(&fnic_stats->io_stats.io_btw_100_to_500_msec); + else if(io_duration_time <= 5000) + atomic64_inc(&fnic_stats->io_stats.io_btw_500_to_5000_msec); + else if(io_duration_time <= 10000) + atomic64_inc(&fnic_stats->io_stats.io_btw_5000_to_10000_msec); + else if(io_duration_time <= 30000) + atomic64_inc(&fnic_stats->io_stats.io_btw_10000_to_30000_msec); + else { + atomic64_inc(&fnic_stats->io_stats.io_greater_than_30000_msec); + + if(io_duration_time > atomic64_read(&fnic_stats->io_stats.current_max_io_time)) + atomic64_set(&fnic_stats->io_stats.current_max_io_time, io_duration_time); + } + /* Call SCSI completion function to complete the IO */ if (sc->scsi_done) sc->scsi_done(sc); @@ -1162,6 +1188,11 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic, (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); sc->scsi_done(sc); + atomic64_dec(&fnic_stats->io_stats.active_ios); + if (atomic64_read(&fnic->io_cmpl_skip)) + atomic64_dec(&fnic->io_cmpl_skip); + else + atomic64_inc(&fnic_stats->io_stats.io_completions); } } @@ -1774,6 +1805,7 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) struct terminate_stats *term_stats; enum fnic_ioreq_state old_ioreq_state; int tag; + unsigned long abt_issued_time; DECLARE_COMPLETION_ONSTACK(tm_done); /* Wait for rport to unblock */ @@ -1825,6 +1857,25 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) spin_unlock_irqrestore(io_lock, flags); goto wait_pending; } + + abt_issued_time = jiffies_to_msecs(jiffies) - jiffies_to_msecs(io_req->start_time); + if (abt_issued_time <= 6000) + atomic64_inc(&abts_stats->abort_issued_btw_0_to_6_sec); + else if (abt_issued_time > 6000 && abt_issued_time <= 20000) + atomic64_inc(&abts_stats->abort_issued_btw_6_to_20_sec); + else if (abt_issued_time > 20000 && abt_issued_time <= 30000) + atomic64_inc(&abts_stats->abort_issued_btw_20_to_30_sec); + else if (abt_issued_time > 30000 && abt_issued_time <= 40000) + atomic64_inc(&abts_stats->abort_issued_btw_30_to_40_sec); + else if (abt_issued_time > 40000 && abt_issued_time <= 50000) + atomic64_inc(&abts_stats->abort_issued_btw_40_to_50_sec); + else if (abt_issued_time > 50000 && abt_issued_time <= 60000) + atomic64_inc(&abts_stats->abort_issued_btw_50_to_60_sec); + else + atomic64_inc(&abts_stats->abort_issued_greater_than_60_sec); + + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "CBD Opcode: %02x Abort issued time: %lu msec\n", sc->cmnd[0], abt_issued_time); /* * Command is still pending, need to abort it * If the firmware completes the command after this point, @@ -1951,6 +2002,11 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) /* Call SCSI completion function to complete the IO */ sc->result = (DID_ABORT << 16); sc->scsi_done(sc); + atomic64_dec(&fnic_stats->io_stats.active_ios); + if (atomic64_read(&fnic->io_cmpl_skip)) + atomic64_dec(&fnic->io_cmpl_skip); + else + atomic64_inc(&fnic_stats->io_stats.io_completions); } fnic_abort_cmd_end: diff --git a/drivers/scsi/fnic/fnic_stats.h b/drivers/scsi/fnic/fnic_stats.h index 540cceb843cd..88c73cccb015 100644 --- a/drivers/scsi/fnic/fnic_stats.h +++ b/drivers/scsi/fnic/fnic_stats.h @@ -26,6 +26,14 @@ struct io_path_stats { atomic64_t sc_null; atomic64_t io_not_found; atomic64_t num_ios; + atomic64_t io_btw_0_to_10_msec; + atomic64_t io_btw_10_to_100_msec; + atomic64_t io_btw_100_to_500_msec; + atomic64_t io_btw_500_to_5000_msec; + atomic64_t io_btw_5000_to_10000_msec; + atomic64_t io_btw_10000_to_30000_msec; + atomic64_t io_greater_than_30000_msec; + atomic64_t current_max_io_time; }; struct abort_stats { @@ -34,6 +42,13 @@ struct abort_stats { atomic64_t abort_drv_timeouts; atomic64_t abort_fw_timeouts; atomic64_t abort_io_not_found; + atomic64_t abort_issued_btw_0_to_6_sec; + atomic64_t abort_issued_btw_6_to_20_sec; + atomic64_t abort_issued_btw_20_to_30_sec; + atomic64_t abort_issued_btw_30_to_40_sec; + atomic64_t abort_issued_btw_40_to_50_sec; + atomic64_t abort_issued_btw_50_to_60_sec; + atomic64_t abort_issued_greater_than_60_sec; }; struct terminate_stats { @@ -88,6 +103,7 @@ struct misc_stats { atomic64_t devrst_cpwq_alloc_failures; atomic64_t io_cpwq_alloc_failures; atomic64_t no_icmnd_itmf_cmpls; + atomic64_t check_condition; atomic64_t queue_fulls; atomic64_t rport_not_ready; atomic64_t frame_errors; diff --git a/drivers/scsi/fnic/fnic_trace.c b/drivers/scsi/fnic/fnic_trace.c index 65a9bde26974..df4aa89757d6 100644 --- a/drivers/scsi/fnic/fnic_trace.c +++ b/drivers/scsi/fnic/fnic_trace.c @@ -228,7 +228,16 @@ int fnic_get_stats_data(struct stats_debug_info *debug, "Number of IO Failures: %lld\nNumber of IO NOT Found: %lld\n" "Number of Memory alloc Failures: %lld\n" "Number of IOREQ Null: %lld\n" - "Number of SCSI cmd pointer Null: %lld\n", + "Number of SCSI cmd pointer Null: %lld\n" + + "\nIO completion times: \n" + " < 10 ms : %lld\n" + " 10 ms - 100 ms : %lld\n" + " 100 ms - 500 ms : %lld\n" + " 500 ms - 5 sec: %lld\n" + " 5 sec - 10 sec: %lld\n" + " 10 sec - 30 sec: %lld\n" + " > 30 sec: %lld\n", (u64)atomic64_read(&stats->io_stats.active_ios), (u64)atomic64_read(&stats->io_stats.max_active_ios), (u64)atomic64_read(&stats->io_stats.num_ios), @@ -237,28 +246,58 @@ int fnic_get_stats_data(struct stats_debug_info *debug, (u64)atomic64_read(&stats->io_stats.io_not_found), (u64)atomic64_read(&stats->io_stats.alloc_failures), (u64)atomic64_read(&stats->io_stats.ioreq_null), - (u64)atomic64_read(&stats->io_stats.sc_null)); + (u64)atomic64_read(&stats->io_stats.sc_null), + (u64)atomic64_read(&stats->io_stats.io_btw_0_to_10_msec), + (u64)atomic64_read(&stats->io_stats.io_btw_10_to_100_msec), + (u64)atomic64_read(&stats->io_stats.io_btw_100_to_500_msec), + (u64)atomic64_read(&stats->io_stats.io_btw_500_to_5000_msec), + (u64)atomic64_read(&stats->io_stats.io_btw_5000_to_10000_msec), + (u64)atomic64_read(&stats->io_stats.io_btw_10000_to_30000_msec), + (u64)atomic64_read(&stats->io_stats.io_greater_than_30000_msec)); + + len += snprintf(debug->debug_buffer + len, buf_size - len, + "\nCurrent Max IO time : %lld\n", + (u64)atomic64_read(&stats->io_stats.current_max_io_time)); len += snprintf(debug->debug_buffer + len, buf_size - len, "\n------------------------------------------\n" "\t\tAbort Statistics\n" "------------------------------------------\n"); + len += snprintf(debug->debug_buffer + len, buf_size - len, "Number of Aborts: %lld\n" "Number of Abort Failures: %lld\n" "Number of Abort Driver Timeouts: %lld\n" "Number of Abort FW Timeouts: %lld\n" - "Number of Abort IO NOT Found: %lld\n", + "Number of Abort IO NOT Found: %lld\n" + + "Abord issued times: \n" + " < 6 sec : %lld\n" + " 6 sec - 20 sec : %lld\n" + " 20 sec - 30 sec : %lld\n" + " 30 sec - 40 sec : %lld\n" + " 40 sec - 50 sec : %lld\n" + " 50 sec - 60 sec : %lld\n" + " > 60 sec: %lld\n", + (u64)atomic64_read(&stats->abts_stats.aborts), (u64)atomic64_read(&stats->abts_stats.abort_failures), (u64)atomic64_read(&stats->abts_stats.abort_drv_timeouts), (u64)atomic64_read(&stats->abts_stats.abort_fw_timeouts), - (u64)atomic64_read(&stats->abts_stats.abort_io_not_found)); + (u64)atomic64_read(&stats->abts_stats.abort_io_not_found), + (u64)atomic64_read(&stats->abts_stats.abort_issued_btw_0_to_6_sec), + (u64)atomic64_read(&stats->abts_stats.abort_issued_btw_6_to_20_sec), + (u64)atomic64_read(&stats->abts_stats.abort_issued_btw_20_to_30_sec), + (u64)atomic64_read(&stats->abts_stats.abort_issued_btw_30_to_40_sec), + (u64)atomic64_read(&stats->abts_stats.abort_issued_btw_40_to_50_sec), + (u64)atomic64_read(&stats->abts_stats.abort_issued_btw_50_to_60_sec), + (u64)atomic64_read(&stats->abts_stats.abort_issued_greater_than_60_sec)); len += snprintf(debug->debug_buffer + len, buf_size - len, "\n------------------------------------------\n" "\t\tTerminate Statistics\n" "------------------------------------------\n"); + len += snprintf(debug->debug_buffer + len, buf_size - len, "Number of Terminates: %lld\n" "Maximum Terminates: %lld\n" @@ -356,6 +395,7 @@ int fnic_get_stats_data(struct stats_debug_info *debug, "Number of Copy WQ Alloc Failures for Device Reset: %lld\n" "Number of Copy WQ Alloc Failures for IOs: %lld\n" "Number of no icmnd itmf Completions: %lld\n" + "Number of Check Conditions: %lld\n" "Number of QUEUE Fulls: %lld\n" "Number of rport not ready: %lld\n" "Number of receive frame errors: %lld\n", @@ -376,6 +416,7 @@ int fnic_get_stats_data(struct stats_debug_info *debug, &stats->misc_stats.devrst_cpwq_alloc_failures), (u64)atomic64_read(&stats->misc_stats.io_cpwq_alloc_failures), (u64)atomic64_read(&stats->misc_stats.no_icmnd_itmf_cmpls), + (u64)atomic64_read(&stats->misc_stats.check_condition), (u64)atomic64_read(&stats->misc_stats.queue_fulls), (u64)atomic64_read(&stats->misc_stats.rport_not_ready), (u64)atomic64_read(&stats->misc_stats.frame_errors)); -- 2.50.1