Since there's a PID space limit of 30bits (see
futex.h:FUTEX_TID_MASK) and allocating that many tasks (assuming a
lower bound of 2 pages per task) would still take 8T of memory it
seems reasonable to say that unsigned int is sufficient for
rq->nr_running.
When we do get anywhere near that amount of tasks I suspect other
things would go funny, load-balancer load computations would really
need to be hoisted to 128bit etc.
So save a few bytes and convert rq->nr_running and friends to
unsigned int.
Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-y3tvyszjdmbibade5bw8zl81@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
                        SPLIT_NS(spread0));
        SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
                        cfs_rq->nr_spread_over);
-       SEQ_printf(m, "  .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
+       SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
        SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
 #ifdef CONFIG_FAIR_GROUP_SCHED
 #ifdef CONFIG_SMP
 
                 * correctly treated as an imbalance.
                 */
                env.flags |= LBF_ALL_PINNED;
-               env.load_move   = imbalance;
-               env.src_cpu     = busiest->cpu;
-               env.src_rq      = busiest;
-               env.loop_max    = min_t(unsigned long, sysctl_sched_nr_migrate, busiest->nr_running);
+               env.load_move = imbalance;
+               env.src_cpu   = busiest->cpu;
+               env.src_rq    = busiest;
+               env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running);
 
 more_balance:
                local_irq_save(flags);
 
 /* CFS-related fields in a runqueue */
 struct cfs_rq {
        struct load_weight load;
-       unsigned long nr_running, h_nr_running;
+       unsigned int nr_running, h_nr_running;
 
        u64 exec_clock;
        u64 min_vruntime;
 /* Real-Time classes' related field in a runqueue: */
 struct rt_rq {
        struct rt_prio_array active;
-       unsigned long rt_nr_running;
+       unsigned int rt_nr_running;
 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
        struct {
                int curr; /* highest queued rt task prio */
         * nr_running and cpu_load should be in the same cacheline because
         * remote CPUs use both these fields when doing load calculation.
         */
-       unsigned long nr_running;
+       unsigned int nr_running;
        #define CPU_LOAD_IDX_MAX 5
        unsigned long cpu_load[CPU_LOAD_IDX_MAX];
        unsigned long last_load_update_tick;