]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
IB/rds_rdma: unloading of ofed stack causes page fault panic
authorRama Nichanamatlu <rama.nichanamatlu@oracle.com>
Thu, 11 Jun 2015 17:43:54 +0000 (10:43 -0700)
committerMukesh Kacker <mukesh.kacker@oracle.com>
Fri, 4 Sep 2015 02:08:04 +0000 (19:08 -0700)
This issue surfaced at the tail end of OFED functional automatic test suite
while unloading ofed modules resulting in following stack trace:
 BUG: unable to handle kernel paging request at ffffffffa0abd1a0
 IP: [<ffffffffa0abd1a0>] 0xffffffffa0abd1a0

 Modules linked in: rds(-) ib_ipoib ... dm_mod [last unloaded: rds_rdma]

 Workqueue: krdsd 0xffffffffa0abd1a0
 task: ffff880670ac8df0 ti: ffff880666654000 task.ti: ffff880666654000
 RIP: 0010:[<ffffffffa0abd1a0>]  [<ffffffffa0abd1a0>] 0xffffffffa0abd1a0
 RSP: 0018:ffff880666657de0  EFLAGS: 00010286
 RAX: 0000000000000600 RBX: ffff880664a03380 RCX: dead000000200200
 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff880664a03380
 RBP: ffff880666657e38 R08: ffff880664a03388 R09: 0000000000000000
 R10: 0000000000000000 R11: 0000000000000000 R12: ffff880674279c80
 R13: ffff880675169800 R14: ffff880671a5dd00 R15: 0000000000000000
 FS:  0000000000000000(0000) GS:ffff88067fc00000(0000) GS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
 CR2: ffffffffa0abd1a0 CR3: 0000000001a56000 CR4: 00000000000007e0
 Stack:
  ffffffff810962d6 000000000000000b ffff880664a03388 ffff880675169800
  ffff880671a5dd15 ffff880674279cb0 ffff880674279c80 ffff880675169800
  ffff880675169bc0 ffff880674279cb0 ffff880675169818 ffff880666657eb8
 Call Trace:
  [<ffffffff810962d6>] ? process_one_work+0x146/0x450

The root cause for panic is failure to purge an active delayed work
request for active bonding initial failover work.

The fix is to cancel active bonding initial failover delayed work if
still active at module unload.

Orabug: 20861212

Signed-off-by: Rama Nichanamatlu <rama.nichanamatlu@oracle.com>
Acked-by: Mukesh Kacker <mukesh.kacker@oracle.com>
net/rds/ib.c
net/rds/ib.h
net/rds/rdma_transport.c

index 7bdae390143799429d2cd4edadafc71319452d6b..5dc821faf81e960445c7aa999a272a9060bdb2bc 100644 (file)
@@ -145,6 +145,10 @@ static u8       excl_ips_cnt = 0;
 static int ip_config_init_phase_flag; /* = 0 */
 static int initial_failovers_iterations; /* = 0 */
 
+static void rds_ib_initial_failovers(struct work_struct *workarg);
+DECLARE_DELAYED_WORK(riif_dlywork, rds_ib_initial_failovers);
+static int timeout_until_initial_failovers;
+
 /*
  * rds_detected_linklayer_up
  *
@@ -1536,11 +1540,8 @@ static void rds_ib_event_handler(struct ib_event_handler *handler,
 }
 
 static void
-rds_ib_do_initial_failovers(struct work_struct *workarg)
+rds_ib_do_initial_failovers(void)
 {
-       struct rds_ib_initial_failovers_work *riif_work =
-               container_of(workarg, struct rds_ib_initial_failovers_work,
-                            dlywork.work);
        unsigned int ii;
        unsigned int ports_deactivated = 0;
        int ret = 0;
@@ -1611,15 +1612,11 @@ rds_ib_do_initial_failovers(struct work_struct *workarg)
        }
 
        ip_config_init_phase_flag = 0; /* done with initial phase! */
-       kfree(riif_work);
 }
 
 static void
 rds_ib_initial_failovers(struct work_struct *workarg)
 {
-       struct rds_ib_initial_failovers_work *riif_work =
-               container_of(workarg, struct rds_ib_initial_failovers_work,
-                            dlywork.work);
 
        if (rds_ib_sysctl_trigger_active_bonding == 0) {
                /*
@@ -1631,12 +1628,11 @@ rds_ib_initial_failovers(struct work_struct *workarg)
                 * If trigger not set, defer, unless we have
                 * reached a max timeout!
                 */
-               if (riif_work->timeout > 0) {
-                       INIT_DELAYED_WORK(&riif_work->dlywork,
-                                         rds_ib_initial_failovers);
-                       riif_work->timeout -= msecs_to_jiffies(100);
+               if (timeout_until_initial_failovers > 0) {
+                       timeout_until_initial_failovers -=
+                         msecs_to_jiffies(100);
                        queue_delayed_work(rds_wq,
-                                          &riif_work->dlywork,
+                                          &riif_dlywork,
                                           msecs_to_jiffies(100));
                        initial_failovers_iterations++;
                        return;
@@ -1656,7 +1652,7 @@ rds_ib_initial_failovers(struct work_struct *workarg)
                       "failovers(itercount %d)\n",
                       initial_failovers_iterations);
        }
-       rds_ib_do_initial_failovers(workarg);
+       rds_ib_do_initial_failovers();
 }
 
 static void rds_ib_dump_ip_config(void)
@@ -1750,7 +1746,6 @@ static void
 sched_initial_failovers(unsigned int tot_devs,
                        unsigned int tot_ibdevs)
 {
-       struct rds_ib_initial_failovers_work *riif_work;
        unsigned int trigger_delay_max_jiffies;
        unsigned int trigger_delay_min_jiffies;
 
@@ -1812,28 +1807,16 @@ sched_initial_failovers(unsigned int tot_devs,
                       rds_ib_active_bonding_trigger_delay_max_msecs);
        }
 
-       riif_work = kzalloc(sizeof(struct rds_ib_initial_failovers_work),
-                           GFP_KERNEL);
-       if (riif_work == NULL) {
-               printk(KERN_ERR
-                      "RDS/IB: failed to allocate initial failovers work");
-               ip_config_init_phase_flag = 0;
-               return;
-       }
-
        trigger_delay_max_jiffies =
                msecs_to_jiffies(rds_ib_active_bonding_trigger_delay_max_msecs);
-       riif_work->timeout = trigger_delay_max_jiffies;
 
        trigger_delay_min_jiffies =
                msecs_to_jiffies(rds_ib_active_bonding_trigger_delay_min_msecs);
 
-       INIT_DELAYED_WORK(&riif_work->dlywork, rds_ib_initial_failovers);
-
-       riif_work->timeout = trigger_delay_max_jiffies;
+       timeout_until_initial_failovers = trigger_delay_max_jiffies;
 
        queue_delayed_work(rds_wq,
-                          &riif_work->dlywork,
+                          &riif_dlywork,
                           trigger_delay_min_jiffies);
 }
 
index 50bba68c704b0083a73211424e22ab129ad6747d..e8dd067ac1f59a7fed7938adec9a7d40874b09c5 100644 (file)
@@ -395,11 +395,6 @@ struct rds_ib_port_ud_work {
        int                             event_type;
 };
 
-struct rds_ib_initial_failovers_work {
-       struct delayed_work            dlywork;
-       int                            timeout;
-};
-
 struct rds_ib_conn_drop_work {
        struct delayed_work             work;
        struct rds_connection          *conn;
@@ -566,6 +561,8 @@ extern struct list_head ib_nodev_conns;
 
 extern struct socket *rds_ib_inet_socket;
 
+extern struct delayed_work riif_dlywork;
+
 /* ib_cm.c */
 int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp);
 void rds_ib_conn_free(void *arg);
index b48b5797e2901634e47e6f21c2e554a2fad57054..b826fcc9c709e48877f3ddd7b16da4167a836a75 100644 (file)
@@ -386,6 +386,8 @@ void rds_rdma_exit(void)
 {
        /* stop listening first to ensure no new connections are attempted */
        rds_rdma_listen_stop();
+       /* cancel initial ib failover work if still active*/
+       cancel_delayed_work_sync(&riif_dlywork);
        rds_ib_exit();
        rds_iw_exit();
 }