]> www.infradead.org Git - users/hch/blktests.git/commitdiff
block/008: check CPU offline failure due to many IRQs
authorShin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Fri, 28 Jan 2022 09:45:12 +0000 (18:45 +0900)
committerOmar Sandoval <osandov@fb.com>
Thu, 17 Feb 2022 23:46:53 +0000 (15:46 -0800)
When systems have more IRQs than a single CPU can handle, the test case
block/008 fails with kernel message such as,

   "CPU 31 has 111 vectors, 90 available. Cannot disable CPU"

The failure cause is that the test case offlined too many CPUs and the
left online CPU can not hold all of the required IRQ vectors. To avoid
this failure, check error message of CPU offline. If CPU offline failure
cause is IRQ vector resource shortage, do not handle it as a failure.
Also keep the actual number of CPUs which can be offlined without the
failure and use this number for the test.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
tests/block/008

index 7445f8f33dae6cfd02e50dde16365fc639a3f300..75aae6505a6136657a6804ce82dd6be3ad367edb 100755 (executable)
@@ -60,17 +60,30 @@ test_device() {
 
                if (( offlining )); then
                        idx=$((RANDOM % ${#online_cpus[@]}))
-                       _offline_cpu "${online_cpus[$idx]}"
-                       offline_cpus+=("${online_cpus[$idx]}")
-                       unset "online_cpus[$idx]"
-                       online_cpus=("${online_cpus[@]}")
-               else
+                       if err=$(_offline_cpu "${online_cpus[$idx]}" 2>&1); then
+                               offline_cpus+=("${online_cpus[$idx]}")
+                               unset "online_cpus[$idx]"
+                               online_cpus=("${online_cpus[@]}")
+                       elif [[ $err =~ "No space left on device" ]]; then
+                               # ENOSPC means CPU offline failure due to IRQ
+                               # vector shortage. Keep current number of
+                               # offline CPUs as maximum CPUs to offline.
+                               max_offline=${#offline_cpus[@]}
+                               offlining=0
+                       else
+                               echo "Failed to offline CPU: $err"
+                               break
+                       fi
+               fi
+
+               if (( !offlining )); then
                        idx=$((RANDOM % ${#offline_cpus[@]}))
                        _online_cpu "${offline_cpus[$idx]}"
                        online_cpus+=("${offline_cpus[$idx]}")
                        unset "offline_cpus[$idx]"
                        offline_cpus=("${offline_cpus[@]}")
                fi
+
                end_time=$(date +%s)
                if (( end_time - start_time > timeout + 15 )); then
                        echo "fio did not finish after $timeout seconds!"