return ((u64)hi << 32) | lo;
 }
 
+static u64 panfrost_get_core_mask(struct panfrost_device *pfdev)
+{
+       u64 core_mask;
+
+       if (pfdev->features.l2_present == 1)
+               return U64_MAX;
+
+       /*
+        * Only support one core group now.
+        * ~(l2_present - 1) unsets all bits in l2_present except
+        * the bottom bit. (l2_present - 2) has all the bits in
+        * the first core group set. AND them together to generate
+        * a mask of cores in the first core group.
+        */
+       core_mask = ~(pfdev->features.l2_present - 1) &
+                    (pfdev->features.l2_present - 2);
+       dev_info_once(pfdev->dev, "using only 1st core group (%lu cores from %lu)\n",
+                     hweight64(core_mask),
+                     hweight64(pfdev->features.shader_present));
+
+       return core_mask;
+}
+
 void panfrost_gpu_power_on(struct panfrost_device *pfdev)
 {
        int ret;
        u32 val;
-       u64 core_mask = U64_MAX;
+       u64 core_mask;
 
        panfrost_gpu_init_quirks(pfdev);
+       core_mask = panfrost_get_core_mask(pfdev);
 
-       if (pfdev->features.l2_present != 1) {
-               /*
-                * Only support one core group now.
-                * ~(l2_present - 1) unsets all bits in l2_present except
-                * the bottom bit. (l2_present - 2) has all the bits in
-                * the first core group set. AND them together to generate
-                * a mask of cores in the first core group.
-                */
-               core_mask = ~(pfdev->features.l2_present - 1) &
-                            (pfdev->features.l2_present - 2);
-               dev_info_once(pfdev->dev, "using only 1st core group (%lu cores from %lu)\n",
-                             hweight64(core_mask),
-                             hweight64(pfdev->features.shader_present));
-       }
        gpu_write(pfdev, L2_PWRON_LO, pfdev->features.l2_present & core_mask);
        ret = readl_relaxed_poll_timeout(pfdev->iomem + L2_READY_LO,
                val, val == (pfdev->features.l2_present & core_mask),
 
 void panfrost_gpu_power_off(struct panfrost_device *pfdev)
 {
-       gpu_write(pfdev, TILER_PWROFF_LO, 0);
-       gpu_write(pfdev, SHADER_PWROFF_LO, 0);
-       gpu_write(pfdev, L2_PWROFF_LO, 0);
+       u64 core_mask = panfrost_get_core_mask(pfdev);
+       int ret;
+       u32 val;
+
+       gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present & core_mask);
+       ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO,
+                                        val, !val, 1, 1000);
+       if (ret)
+               dev_err(pfdev->dev, "shader power transition timeout");
+
+       gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present);
+       ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO,
+                                        val, !val, 1, 1000);
+       if (ret)
+               dev_err(pfdev->dev, "tiler power transition timeout");
+
+       gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present & core_mask);
+       ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO,
+                                val, !val, 0, 1000);
+       if (ret)
+               dev_err(pfdev->dev, "l2 power transition timeout");
 }
 
 int panfrost_gpu_init(struct panfrost_device *pfdev)