#include "intel_sideband.h"
 #include "../../../platform/x86/intel_ips.h"
 
+#define BUSY_MAX_EI    20u /* ms */
+
 /*
  * Lock protecting IPS related data structures
  */
        intel_uncore_write_fw(uncore, reg, val);
 }
 
+static void rps_timer(struct timer_list *t)
+{
+       struct intel_rps *rps = from_timer(rps, t, timer);
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       s64 max_busy[3] = {};
+       ktime_t dt, last;
+
+       for_each_engine(engine, rps_to_gt(rps), id) {
+               s64 busy;
+               int i;
+
+               dt = intel_engine_get_busy_time(engine);
+               last = engine->stats.rps;
+               engine->stats.rps = dt;
+
+               busy = ktime_to_ns(ktime_sub(dt, last));
+               for (i = 0; i < ARRAY_SIZE(max_busy); i++) {
+                       if (busy > max_busy[i])
+                               swap(busy, max_busy[i]);
+               }
+       }
+
+       dt = ktime_get();
+       last = rps->pm_timestamp;
+       rps->pm_timestamp = dt;
+
+       if (intel_rps_is_active(rps)) {
+               s64 busy;
+               int i;
+
+               dt = ktime_sub(dt, last);
+
+               /*
+                * Our goal is to evaluate each engine independently, so we run
+                * at the lowest clocks required to sustain the heaviest
+                * workload. However, a task may be split into sequential
+                * dependent operations across a set of engines, such that
+                * the independent contributions do not account for high load,
+                * but overall the task is GPU bound. For example, consider
+                * video decode on vcs followed by colour post-processing
+                * on vecs, followed by general post-processing on rcs.
+                * Since multi-engines being active does imply a single
+                * continuous workload across all engines, we hedge our
+                * bets by only contributing a factor of the distributed
+                * load into our busyness calculation.
+                */
+               busy = max_busy[0];
+               for (i = 1; i < ARRAY_SIZE(max_busy); i++) {
+                       if (!max_busy[i])
+                               break;
+
+                       busy += div_u64(max_busy[i], 1 << i);
+               }
+               GT_TRACE(rps_to_gt(rps),
+                        "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
+                        busy, (int)div64_u64(100 * busy, dt),
+                        max_busy[0], max_busy[1], max_busy[2],
+                        rps->pm_interval);
+
+               if (100 * busy > rps->power.up_threshold * dt &&
+                   rps->cur_freq < rps->max_freq_softlimit) {
+                       rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
+                       rps->pm_interval = 1;
+                       schedule_work(&rps->work);
+               } else if (100 * busy < rps->power.down_threshold * dt &&
+                          rps->cur_freq > rps->min_freq_softlimit) {
+                       rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
+                       rps->pm_interval = 1;
+                       schedule_work(&rps->work);
+               } else {
+                       rps->last_adj = 0;
+               }
+
+               mod_timer(&rps->timer,
+                         jiffies + msecs_to_jiffies(rps->pm_interval));
+               rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI);
+       }
+}
+
+static void rps_start_timer(struct intel_rps *rps)
+{
+       rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
+       rps->pm_interval = 1;
+       mod_timer(&rps->timer, jiffies + 1);
+}
+
+static void rps_stop_timer(struct intel_rps *rps)
+{
+       del_timer_sync(&rps->timer);
+       rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
+       cancel_work_sync(&rps->work);
+}
+
 static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
 {
        u32 mask = 0;
        if (new_power == rps->power.mode)
                return;
 
+       threshold_up = 95;
+       threshold_down = 85;
+
        /* Note the units here are not exactly 1us, but 1280ns. */
        switch (new_power) {
        case LOW_POWER:
-               /* Upclock if more than 95% busy over 16ms */
                ei_up = 16000;
-               threshold_up = 95;
-
-               /* Downclock if less than 85% busy over 32ms */
                ei_down = 32000;
-               threshold_down = 85;
                break;
 
        case BETWEEN:
-               /* Upclock if more than 90% busy over 13ms */
                ei_up = 13000;
-               threshold_up = 90;
-
-               /* Downclock if less than 75% busy over 32ms */
                ei_down = 32000;
-               threshold_down = 75;
                break;
 
        case HIGH_POWER:
-               /* Upclock if more than 85% busy over 10ms */
                ei_up = 10000;
-               threshold_up = 85;
-
-               /* Downclock if less than 60% busy over 32ms */
                ei_down = 32000;
-               threshold_down = 60;
                break;
        }
 
 
        mutex_unlock(&rps->lock);
 
+       rps->pm_iir = 0;
        if (intel_rps_has_interrupts(rps))
                rps_enable_interrupts(rps);
+       if (intel_rps_uses_timer(rps))
+               rps_start_timer(rps);
 
        if (IS_GEN(rps_to_i915(rps), 5))
                gen5_rps_update(rps);
        if (!intel_rps_clear_active(rps))
                return;
 
+       if (intel_rps_uses_timer(rps))
+               rps_stop_timer(rps);
        if (intel_rps_has_interrupts(rps))
                rps_disable_interrupts(rps);
 
        return ips->gfx_power + state2;
 }
 
+static bool has_busy_stats(struct intel_rps *rps)
+{
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+
+       for_each_engine(engine, rps_to_gt(rps), id) {
+               if (!intel_engine_supports_stats(engine))
+                       return false;
+       }
+
+       return true;
+}
+
 void intel_rps_enable(struct intel_rps *rps)
 {
        struct drm_i915_private *i915 = rps_to_i915(rps);
        GEM_BUG_ON(rps->efficient_freq < rps->min_freq);
        GEM_BUG_ON(rps->efficient_freq > rps->max_freq);
 
-       if (INTEL_GEN(i915) >= 6)
+       if (has_busy_stats(rps))
+               intel_rps_set_timer(rps);
+       else if (INTEL_GEN(i915) >= 6)
                intel_rps_set_interrupts(rps);
        else
                /* Ironlake currently uses intel_ips.ko */ {}
 
        intel_rps_clear_enabled(rps);
        intel_rps_clear_interrupts(rps);
+       intel_rps_clear_timer(rps);
 
        if (INTEL_GEN(i915) >= 6)
                gen6_rps_disable(rps);
        mutex_init(&rps->power.mutex);
 
        INIT_WORK(&rps->work, rps_work);
+       timer_setup(&rps->timer, rps_timer, 0);
 
        atomic_set(&rps->num_waiters, 0);
 }