There is a race between reset and the transmit paths that can lead to
ibmvnic_xmit() accessing an scrq after it has been freed in the reset
path. It can result in a crash like:
	Kernel attempted to read user page (0) - exploit attempt? (uid: 0)
	BUG: Kernel NULL pointer dereference on read at 0x00000000
	Faulting instruction address: 0xc0080000016189f8
	Oops: Kernel access of bad area, sig: 11 [#1]
	...
	NIP [
c0080000016189f8] ibmvnic_xmit+0x60/0xb60 [ibmvnic]
	LR [
c000000000c0046c] dev_hard_start_xmit+0x11c/0x280
	Call Trace:
	[
c008000001618f08] ibmvnic_xmit+0x570/0xb60 [ibmvnic] (unreliable)
	[
c000000000c0046c] dev_hard_start_xmit+0x11c/0x280
	[
c000000000c9cfcc] sch_direct_xmit+0xec/0x330
	[
c000000000bfe640] __dev_xmit_skb+0x3a0/0x9d0
	[
c000000000c00ad4] __dev_queue_xmit+0x394/0x730
	[
c008000002db813c] __bond_start_xmit+0x254/0x450 [bonding]
	[
c008000002db8378] bond_start_xmit+0x40/0xc0 [bonding]
	[
c000000000c0046c] dev_hard_start_xmit+0x11c/0x280
	[
c000000000c00ca4] __dev_queue_xmit+0x564/0x730
	[
c000000000cf97e0] neigh_hh_output+0xd0/0x180
	[
c000000000cfa69c] ip_finish_output2+0x31c/0x5c0
	[
c000000000cfd244] __ip_queue_xmit+0x194/0x4f0
	[
c000000000d2a3c4] __tcp_transmit_skb+0x434/0x9b0
	[
c000000000d2d1e0] __tcp_retransmit_skb+0x1d0/0x6a0
	[
c000000000d2d984] tcp_retransmit_skb+0x34/0x130
	[
c000000000d310e8] tcp_retransmit_timer+0x388/0x6d0
	[
c000000000d315ec] tcp_write_timer_handler+0x1bc/0x330
	[
c000000000d317bc] tcp_write_timer+0x5c/0x200
	[
c000000000243270] call_timer_fn+0x50/0x1c0
	[
c000000000243704] __run_timers.part.0+0x324/0x460
	[
c000000000243894] run_timer_softirq+0x54/0xa0
	[
c000000000ea713c] __do_softirq+0x15c/0x3e0
	[
c000000000166258] __irq_exit_rcu+0x158/0x190
	[
c000000000166420] irq_exit+0x20/0x40
	[
c00000000002853c] timer_interrupt+0x14c/0x2b0
	[
c000000000009a00] decrementer_common_virt+0x210/0x220
	--- interrupt: 900 at plpar_hcall_norets_notrace+0x18/0x2c
The immediate cause of the crash is the access of tx_scrq in the following
snippet during a reset, where the tx_scrq can be either NULL or an address
that will soon be invalid:
	ibmvnic_xmit()
	{
		...
		tx_scrq = adapter->tx_scrq[queue_num];
		txq = netdev_get_tx_queue(netdev, queue_num);
		ind_bufp = &tx_scrq->ind_buf;
		if (test_bit(0, &adapter->resetting)) {
		...
	}
But beyond that, the call to ibmvnic_xmit() itself is not safe during a
reset and the reset path attempts to avoid this by stopping the queue in
ibmvnic_cleanup(). However just after the queue was stopped, an in-flight
ibmvnic_complete_tx() could have restarted the queue even as the reset is
progressing.
Since the queue was restarted we could get a call to ibmvnic_xmit() which
can then access the bad tx_scrq (or other fields).
We cannot however simply have ibmvnic_complete_tx() check the ->resetting
bit and skip starting the queue. This can race at the "back-end" of a good
reset which just restarted the queue but has not cleared the ->resetting
bit yet. If we skip restarting the queue due to ->resetting being true,
the queue would remain stopped indefinitely potentially leading to transmit
timeouts.
IOW ->resetting is too broad for this purpose. Instead use a new flag
that indicates whether or not the queues are active. Only the open/
reset paths control when the queues are active. ibmvnic_complete_tx()
and others wake up the queue only if the queue is marked active.
So we will have:
	A. reset/open thread in ibmvnic_cleanup() and __ibmvnic_open()
		->resetting = true
		->tx_queues_active = false
		disable tx queues
		...
		->tx_queues_active = true
		start tx queues
	B. Tx interrupt in ibmvnic_complete_tx():
		if (->tx_queues_active)
			netif_wake_subqueue();
To ensure that ->tx_queues_active and state of the queues are consistent,
we need a lock which:
	- must also be taken in the interrupt path (ibmvnic_complete_tx())
	- shared across the multiple queues in the adapter (so they don't
	  become serialized)
Use rcu_read_lock() and have the reset thread synchronize_rcu() after
updating the ->tx_queues_active state.
While here, consolidate a few boolean fields in ibmvnic_adapter for
better alignment.
Based on discussions with Brian King and Dany Madden.
Fixes: 7ed5b31f4a66 ("net/ibmvnic: prevent more than one thread from running in reset")
Reported-by: Vaishnavi Bhat <vaish123@in.ibm.com>
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
 
                return rc;
        }
 
+       adapter->tx_queues_active = true;
+
+       /* Since queues were stopped until now, there shouldn't be any
+        * one in ibmvnic_complete_tx() or ibmvnic_xmit() so maybe we
+        * don't need the synchronize_rcu()? Leaving it for consistency
+        * with setting ->tx_queues_active = false.
+        */
+       synchronize_rcu();
+
        netif_tx_start_all_queues(netdev);
 
        if (prev_state == VNIC_CLOSED) {
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
        /* ensure that transmissions are stopped if called by do_reset */
+
+       adapter->tx_queues_active = false;
+
+       /* Ensure complete_tx() and ibmvnic_xmit() see ->tx_queues_active
+        * update so they don't restart a queue after we stop it below.
+        */
+       synchronize_rcu();
+
        if (test_bit(0, &adapter->resetting))
                netif_tx_disable(netdev);
        else
                tx_buff->skb = NULL;
                adapter->netdev->stats.tx_dropped++;
        }
+
        ind_bufp->index = 0;
+
        if (atomic_sub_return(entries, &tx_scrq->used) <=
            (adapter->req_tx_entries_per_subcrq / 2) &&
-           __netif_subqueue_stopped(adapter->netdev, queue_num) &&
-           !test_bit(0, &adapter->resetting)) {
-               netif_wake_subqueue(adapter->netdev, queue_num);
-               netdev_dbg(adapter->netdev, "Started queue %d\n",
-                          queue_num);
+           __netif_subqueue_stopped(adapter->netdev, queue_num)) {
+               rcu_read_lock();
+
+               if (adapter->tx_queues_active) {
+                       netif_wake_subqueue(adapter->netdev, queue_num);
+                       netdev_dbg(adapter->netdev, "Started queue %d\n",
+                                  queue_num);
+               }
+
+               rcu_read_unlock();
        }
 }
 
        int index = 0;
        u8 proto = 0;
 
-       tx_scrq = adapter->tx_scrq[queue_num];
-       txq = netdev_get_tx_queue(netdev, queue_num);
-       ind_bufp = &tx_scrq->ind_buf;
-
-       if (test_bit(0, &adapter->resetting)) {
+       /* If a reset is in progress, drop the packet since
+        * the scrqs may get torn down. Otherwise use the
+        * rcu to ensure reset waits for us to complete.
+        */
+       rcu_read_lock();
+       if (!adapter->tx_queues_active) {
                dev_kfree_skb_any(skb);
 
                tx_send_failed++;
                goto out;
        }
 
+       tx_scrq = adapter->tx_scrq[queue_num];
+       txq = netdev_get_tx_queue(netdev, queue_num);
+       ind_bufp = &tx_scrq->ind_buf;
+
        if (ibmvnic_xmit_workarounds(skb, netdev)) {
                tx_dropped++;
                tx_send_failed++;
                ibmvnic_tx_scrq_flush(adapter, tx_scrq);
                goto out;
        }
+
        if (skb_is_gso(skb))
                tx_pool = &adapter->tso_pool[queue_num];
        else
                netif_carrier_off(netdev);
        }
 out:
+       rcu_read_unlock();
        netdev->stats.tx_dropped += tx_dropped;
        netdev->stats.tx_bytes += tx_bytes;
        netdev->stats.tx_packets += tx_packets;
                    (adapter->req_tx_entries_per_subcrq / 2) &&
                    __netif_subqueue_stopped(adapter->netdev,
                                             scrq->pool_index)) {
-                       netif_wake_subqueue(adapter->netdev, scrq->pool_index);
-                       netdev_dbg(adapter->netdev, "Started queue %d\n",
-                                  scrq->pool_index);
+                       rcu_read_lock();
+                       if (adapter->tx_queues_active) {
+                               netif_wake_subqueue(adapter->netdev,
+                                                   scrq->pool_index);
+                               netdev_dbg(adapter->netdev,
+                                          "Started queue %d\n",
+                                          scrq->pool_index);
+                       }
+                       rcu_read_unlock();
                }
        }
 
 
        struct work_struct ibmvnic_reset;
        struct delayed_work ibmvnic_delayed_reset;
        unsigned long resetting;
-       bool napi_enabled, from_passive_init;
-       bool login_pending;
        /* last device reset time */
        unsigned long last_reset_time;
 
+       bool napi_enabled;
+       bool from_passive_init;
+       bool login_pending;
+       /* protected by rcu */
+       bool tx_queues_active;
        bool failover_pending;
        bool force_reset_recovery;