From 969bb4e57b68e5fe7f1155762f631aee7b3dc4bb Mon Sep 17 00:00:00 2001 From: Rama Nichanamatlu Date: Thu, 11 Jun 2015 10:43:54 -0700 Subject: [PATCH] IB/rds_rdma: unloading of ofed stack causes page fault panic This issue surfaced at the tail end of OFED functional automatic test suite while unloading ofed modules resulting in following stack trace: BUG: unable to handle kernel paging request at ffffffffa0abd1a0 IP: [] 0xffffffffa0abd1a0 Modules linked in: rds(-) ib_ipoib ... dm_mod [last unloaded: rds_rdma] Workqueue: krdsd 0xffffffffa0abd1a0 task: ffff880670ac8df0 ti: ffff880666654000 task.ti: ffff880666654000 RIP: 0010:[] [] 0xffffffffa0abd1a0 RSP: 0018:ffff880666657de0 EFLAGS: 00010286 RAX: 0000000000000600 RBX: ffff880664a03380 RCX: dead000000200200 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff880664a03380 RBP: ffff880666657e38 R08: ffff880664a03388 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff880674279c80 R13: ffff880675169800 R14: ffff880671a5dd00 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88067fc00000(0000) GS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffffffffa0abd1a0 CR3: 0000000001a56000 CR4: 00000000000007e0 Stack: ffffffff810962d6 000000000000000b ffff880664a03388 ffff880675169800 ffff880671a5dd15 ffff880674279cb0 ffff880674279c80 ffff880675169800 ffff880675169bc0 ffff880674279cb0 ffff880675169818 ffff880666657eb8 Call Trace: [] ? process_one_work+0x146/0x450 The root cause for panic is failure to purge an active delayed work request for active bonding initial failover work. The fix is to cancel active bonding initial failover delayed work if still active at module unload. Orabug: 20861212 Signed-off-by: Rama Nichanamatlu Acked-by: Mukesh Kacker --- net/rds/ib.c | 41 ++++++++++++---------------------------- net/rds/ib.h | 7 ++----- net/rds/rdma_transport.c | 2 ++ 3 files changed, 16 insertions(+), 34 deletions(-) diff --git a/net/rds/ib.c b/net/rds/ib.c index 7bdae3901437..5dc821faf81e 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -145,6 +145,10 @@ static u8 excl_ips_cnt = 0; static int ip_config_init_phase_flag; /* = 0 */ static int initial_failovers_iterations; /* = 0 */ +static void rds_ib_initial_failovers(struct work_struct *workarg); +DECLARE_DELAYED_WORK(riif_dlywork, rds_ib_initial_failovers); +static int timeout_until_initial_failovers; + /* * rds_detected_linklayer_up * @@ -1536,11 +1540,8 @@ static void rds_ib_event_handler(struct ib_event_handler *handler, } static void -rds_ib_do_initial_failovers(struct work_struct *workarg) +rds_ib_do_initial_failovers(void) { - struct rds_ib_initial_failovers_work *riif_work = - container_of(workarg, struct rds_ib_initial_failovers_work, - dlywork.work); unsigned int ii; unsigned int ports_deactivated = 0; int ret = 0; @@ -1611,15 +1612,11 @@ rds_ib_do_initial_failovers(struct work_struct *workarg) } ip_config_init_phase_flag = 0; /* done with initial phase! */ - kfree(riif_work); } static void rds_ib_initial_failovers(struct work_struct *workarg) { - struct rds_ib_initial_failovers_work *riif_work = - container_of(workarg, struct rds_ib_initial_failovers_work, - dlywork.work); if (rds_ib_sysctl_trigger_active_bonding == 0) { /* @@ -1631,12 +1628,11 @@ rds_ib_initial_failovers(struct work_struct *workarg) * If trigger not set, defer, unless we have * reached a max timeout! */ - if (riif_work->timeout > 0) { - INIT_DELAYED_WORK(&riif_work->dlywork, - rds_ib_initial_failovers); - riif_work->timeout -= msecs_to_jiffies(100); + if (timeout_until_initial_failovers > 0) { + timeout_until_initial_failovers -= + msecs_to_jiffies(100); queue_delayed_work(rds_wq, - &riif_work->dlywork, + &riif_dlywork, msecs_to_jiffies(100)); initial_failovers_iterations++; return; @@ -1656,7 +1652,7 @@ rds_ib_initial_failovers(struct work_struct *workarg) "failovers(itercount %d)\n", initial_failovers_iterations); } - rds_ib_do_initial_failovers(workarg); + rds_ib_do_initial_failovers(); } static void rds_ib_dump_ip_config(void) @@ -1750,7 +1746,6 @@ static void sched_initial_failovers(unsigned int tot_devs, unsigned int tot_ibdevs) { - struct rds_ib_initial_failovers_work *riif_work; unsigned int trigger_delay_max_jiffies; unsigned int trigger_delay_min_jiffies; @@ -1812,28 +1807,16 @@ sched_initial_failovers(unsigned int tot_devs, rds_ib_active_bonding_trigger_delay_max_msecs); } - riif_work = kzalloc(sizeof(struct rds_ib_initial_failovers_work), - GFP_KERNEL); - if (riif_work == NULL) { - printk(KERN_ERR - "RDS/IB: failed to allocate initial failovers work"); - ip_config_init_phase_flag = 0; - return; - } - trigger_delay_max_jiffies = msecs_to_jiffies(rds_ib_active_bonding_trigger_delay_max_msecs); - riif_work->timeout = trigger_delay_max_jiffies; trigger_delay_min_jiffies = msecs_to_jiffies(rds_ib_active_bonding_trigger_delay_min_msecs); - INIT_DELAYED_WORK(&riif_work->dlywork, rds_ib_initial_failovers); - - riif_work->timeout = trigger_delay_max_jiffies; + timeout_until_initial_failovers = trigger_delay_max_jiffies; queue_delayed_work(rds_wq, - &riif_work->dlywork, + &riif_dlywork, trigger_delay_min_jiffies); } diff --git a/net/rds/ib.h b/net/rds/ib.h index 50bba68c704b..e8dd067ac1f5 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -395,11 +395,6 @@ struct rds_ib_port_ud_work { int event_type; }; -struct rds_ib_initial_failovers_work { - struct delayed_work dlywork; - int timeout; -}; - struct rds_ib_conn_drop_work { struct delayed_work work; struct rds_connection *conn; @@ -566,6 +561,8 @@ extern struct list_head ib_nodev_conns; extern struct socket *rds_ib_inet_socket; +extern struct delayed_work riif_dlywork; + /* ib_cm.c */ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp); void rds_ib_conn_free(void *arg); diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index b48b5797e290..b826fcc9c709 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -386,6 +386,8 @@ void rds_rdma_exit(void) { /* stop listening first to ensure no new connections are attempted */ rds_rdma_listen_stop(); + /* cancel initial ib failover work if still active*/ + cancel_delayed_work_sync(&riif_dlywork); rds_ib_exit(); rds_iw_exit(); } -- 2.50.1