]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
ice: Fix ice module unload
authorJakub Buchocki <jakubx.buchocki@intel.com>
Mon, 12 Jun 2023 17:14:21 +0000 (10:14 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 21 Jun 2023 14:02:17 +0000 (16:02 +0200)
[ Upstream commit 24b454bc354ab7b1aa918a4fe3d7696516f592d4 ]

Clearing the interrupt scheme before PFR reset,
during the removal routine, could cause the hardware
errors and possibly lead to system reboot, as the PF
reset can cause the interrupt to be generated.

Place the call for PFR reset inside ice_deinit_dev(),
wait until reset and all pending transactions are done,
then call ice_clear_interrupt_scheme().

This introduces a PFR reset to multiple error paths.

Additionally, remove the call for the reset from
ice_load() - it will be a part of ice_unload() now.

Error example:
[   75.229328] ice 0000:ca:00.1: Failed to read Tx Scheduler Tree - User Selection data from flash
[   77.571315] {1}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
[   77.571418] {1}[Hardware Error]: event severity: recoverable
[   77.571459] {1}[Hardware Error]:  Error 0, type: recoverable
[   77.571500] {1}[Hardware Error]:   section_type: PCIe error
[   77.571540] {1}[Hardware Error]:   port_type: 4, root port
[   77.571580] {1}[Hardware Error]:   version: 3.0
[   77.571615] {1}[Hardware Error]:   command: 0x0547, status: 0x4010
[   77.571661] {1}[Hardware Error]:   device_id: 0000:c9:02.0
[   77.571703] {1}[Hardware Error]:   slot: 25
[   77.571736] {1}[Hardware Error]:   secondary_bus: 0xca
[   77.571773] {1}[Hardware Error]:   vendor_id: 0x8086, device_id: 0x347a
[   77.571821] {1}[Hardware Error]:   class_code: 060400
[   77.571858] {1}[Hardware Error]:   bridge: secondary_status: 0x2800, control: 0x0013
[   77.572490] pcieport 0000:c9:02.0: AER: aer_status: 0x00200000, aer_mask: 0x00100020
[   77.572870] pcieport 0000:c9:02.0:    [21] ACSViol                (First)
[   77.573222] pcieport 0000:c9:02.0: AER: aer_layer=Transaction Layer, aer_agent=Receiver ID
[   77.573554] pcieport 0000:c9:02.0: AER: aer_uncor_severity: 0x00463010
[   77.691273] {2}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
[   77.691738] {2}[Hardware Error]: event severity: recoverable
[   77.691971] {2}[Hardware Error]:  Error 0, type: recoverable
[   77.692192] {2}[Hardware Error]:   section_type: PCIe error
[   77.692403] {2}[Hardware Error]:   port_type: 4, root port
[   77.692616] {2}[Hardware Error]:   version: 3.0
[   77.692825] {2}[Hardware Error]:   command: 0x0547, status: 0x4010
[   77.693032] {2}[Hardware Error]:   device_id: 0000:c9:02.0
[   77.693238] {2}[Hardware Error]:   slot: 25
[   77.693440] {2}[Hardware Error]:   secondary_bus: 0xca
[   77.693641] {2}[Hardware Error]:   vendor_id: 0x8086, device_id: 0x347a
[   77.693853] {2}[Hardware Error]:   class_code: 060400
[   77.694054] {2}[Hardware Error]:   bridge: secondary_status: 0x0800, control: 0x0013
[   77.719115] pci 0000:ca:00.1: AER: can't recover (no error_detected callback)
[   77.719140] pcieport 0000:c9:02.0: AER: device recovery failed
[   77.719216] pcieport 0000:c9:02.0: AER: aer_status: 0x00200000, aer_mask: 0x00100020
[   77.719390] pcieport 0000:c9:02.0:    [21] ACSViol                (First)
[   77.719557] pcieport 0000:c9:02.0: AER: aer_layer=Transaction Layer, aer_agent=Receiver ID
[   77.719723] pcieport 0000:c9:02.0: AER: aer_uncor_severity: 0x00463010

Fixes: 5b246e533d01 ("ice: split probe into smaller functions")
Signed-off-by: Jakub Buchocki <jakubx.buchocki@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Link: https://lore.kernel.org/r/20230612171421.21570-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/net/ethernet/intel/ice/ice_main.c

index 0c949ed22a31334dbf7bf55415e635ab84adc42a..98e8ce743fb2e3d6ba08fe3e1b9bb27b8ab18291 100644 (file)
@@ -4794,9 +4794,13 @@ err_init_pf:
 static void ice_deinit_dev(struct ice_pf *pf)
 {
        ice_free_irq_msix_misc(pf);
-       ice_clear_interrupt_scheme(pf);
        ice_deinit_pf(pf);
        ice_deinit_hw(&pf->hw);
+
+       /* Service task is already stopped, so call reset directly. */
+       ice_reset(&pf->hw, ICE_RESET_PFR);
+       pci_wait_for_pending_transaction(pf->pdev);
+       ice_clear_interrupt_scheme(pf);
 }
 
 static void ice_init_features(struct ice_pf *pf)
@@ -5086,10 +5090,6 @@ int ice_load(struct ice_pf *pf)
        struct ice_vsi *vsi;
        int err;
 
-       err = ice_reset(&pf->hw, ICE_RESET_PFR);
-       if (err)
-               return err;
-
        err = ice_init_dev(pf);
        if (err)
                return err;
@@ -5346,12 +5346,6 @@ static void ice_remove(struct pci_dev *pdev)
        ice_setup_mc_magic_wake(pf);
        ice_set_wake(pf);
 
-       /* Issue a PFR as part of the prescribed driver unload flow.  Do not
-        * do it via ice_schedule_reset() since there is no need to rebuild
-        * and the service task is already stopped.
-        */
-       ice_reset(&pf->hw, ICE_RESET_PFR);
-       pci_wait_for_pending_transaction(pdev);
        pci_disable_device(pdev);
 }