extern unsigned long long nr_context_switches(void);
 
 #ifndef CONFIG_GENERIC_HARDIRQS
-#define kstat_irqs_this_cpu(irq) \
-       (this_cpu_read(kstat.irqs[irq])
 
 struct irq_desc;
 
 static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
                                            struct irq_desc *desc)
 {
-       kstat_this_cpu.irqs[irq]++;
-       kstat_this_cpu.irqs_sum++;
+       __this_cpu_inc(kstat.irqs[irq]);
+       __this_cpu_inc(kstat.irqs_sum);
 }
 
 static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 #else
 #include <linux/irq.h>
 extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
-#define kstat_irqs_this_cpu(DESC) \
-       ((DESC)->kstat_irqs[smp_processor_id()])
-#define kstat_incr_irqs_this_cpu(irqno, DESC) do {\
-       ((DESC)->kstat_irqs[smp_processor_id()]++);\
-       kstat_this_cpu.irqs_sum++; } while (0)
+
+#define kstat_incr_irqs_this_cpu(irqno, DESC)          \
+do {                                                   \
+       __this_cpu_inc(*(DESC)->kstat_irqs);            \
+       __this_cpu_inc(kstat.irqs_sum);                 \
+} while (0)
 
 #endif
 
 static inline void kstat_incr_softirqs_this_cpu(unsigned int irq)
 {
-       kstat_this_cpu.softirqs[irq]++;
+       __this_cpu_inc(kstat.softirqs[irq]);
 }
 
 static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu)
 
 
 static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
 {
+       int cpu;
+
        desc->irq_data.irq = irq;
        desc->irq_data.chip = &no_irq_chip;
        desc->irq_data.chip_data = NULL;
        desc->irq_count = 0;
        desc->irqs_unhandled = 0;
        desc->name = NULL;
-       memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
+       for_each_possible_cpu(cpu)
+               *per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
        desc_smp_init(desc, node);
 }
 
        if (!desc)
                return NULL;
        /* allocate based on nr_cpu_ids */
-       desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs),
-                                        gfp, node);
+       desc->kstat_irqs = alloc_percpu(unsigned int);
        if (!desc->kstat_irqs)
                goto err_desc;
 
        return desc;
 
 err_kstat:
-       kfree(desc->kstat_irqs);
+       free_percpu(desc->kstat_irqs);
 err_desc:
        kfree(desc);
        return NULL;
        mutex_unlock(&sparse_irq_lock);
 
        free_masks(desc);
-       kfree(desc->kstat_irqs);
+       free_percpu(desc->kstat_irqs);
        kfree(desc);
 }
 
        }
 };
 
-static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS];
 int __init early_irq_init(void)
 {
        int count, i, node = first_online_node;
        for (i = 0; i < count; i++) {
                desc[i].irq_data.irq = i;
                desc[i].irq_data.chip = &no_irq_chip;
-               desc[i].kstat_irqs = kstat_irqs_all[i];
+               /* TODO : do this allocation on-demand ... */
+               desc[i].kstat_irqs = alloc_percpu(unsigned int);
                alloc_masks(desc + i, GFP_KERNEL, node);
                desc_smp_init(desc + i, node);
                lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
 
 static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
 {
+#if defined(CONFIG_KSTAT_IRQS_ONDEMAND)
+       struct irq_desc *desc;
+       unsigned int i;
+
+       for (i = 0; i < cnt; i++) {
+               desc = irq_to_desc(start + i);
+               if (desc && !desc->kstat_irqs) {
+                       unsigned int __percpu *stats = alloc_percpu(unsigned int);
+
+                       if (!stats)
+                               return -1;
+                       if (cmpxchg(&desc->kstat_irqs, NULL, stats) != NULL)
+                               free_percpu(stats);
+               }
+       }
+#endif
        return start;
 }
 #endif /* !CONFIG_SPARSE_IRQ */
 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
        struct irq_desc *desc = irq_to_desc(irq);
-       return desc ? desc->kstat_irqs[cpu] : 0;
+
+       return desc && desc->kstat_irqs ?
+                       *per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
 }
 
 #ifdef CONFIG_GENERIC_HARDIRQS
        int cpu;
        int sum = 0;
 
-       if (!desc)
+       if (!desc || !desc->kstat_irqs)
                return 0;
        for_each_possible_cpu(cpu)
-               sum += desc->kstat_irqs[cpu];
+               sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
        return sum;
 }
 #endif /* CONFIG_GENERIC_HARDIRQS */