Update i40e and i40evf to use dma_rmb.  This should improve performance by
decreasing the barrier overhead on strong ordered architectures.
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
                 * any other fields out of the rx_desc until we know the
                 * DD bit is set.
                 */
-               rmb();
+               dma_rmb();
                if (i40e_rx_is_programming_status(qword)) {
                        i40e_clean_programming_status(rx_ring, rx_desc);
                        I40E_RX_INCREMENT(rx_ring, i);
                 * any other fields out of the rx_desc until we know the
                 * DD bit is set.
                 */
-               rmb();
+               dma_rmb();
 
                if (i40e_rx_is_programming_status(qword)) {
                        i40e_clean_programming_status(rx_ring, rx_desc);
 
                 * any other fields out of the rx_desc until we know the
                 * DD bit is set.
                 */
-               rmb();
+               dma_rmb();
                rx_bi = &rx_ring->rx_bi[i];
                skb = rx_bi->skb;
                if (likely(!skb)) {
                 * any other fields out of the rx_desc until we know the
                 * DD bit is set.
                 */
-               rmb();
+               dma_rmb();
 
                rx_bi = &rx_ring->rx_bi[i];
                skb = rx_bi->skb;