lguest: optimize by coding restore_flags and irq_enable in assembler.

author Rusty Russell <rusty@rustcorp.com.au>

Sat, 13 Jun 2009 04:27:03 +0000 (22:27 -0600)

committer Rusty Russell <rusty@rustcorp.com.au>

Fri, 12 Jun 2009 12:57:03 +0000 (22:27 +0930)
author Rusty Russell <rusty@rustcorp.com.au>
Sat, 13 Jun 2009 04:27:03 +0000 (22:27 -0600)
committer Rusty Russell <rusty@rustcorp.com.au>
Fri, 12 Jun 2009 12:57:03 +0000 (22:27 +0930)
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c

index 1a830cbd70153b8662eddf16a87a5336c6cb0742..dfdbf640389536489f5ac05b3258361d133d76d8 100644 (file)
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -126,6 +126,7 @@ void foo(void)
  #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
         BLANK();
         OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
+       OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending);
         OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);
  
         BLANK();
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c

index 37b8c1d3e022b7ec0a643d3d6a4cc4545d466e4d..514f4d0d2bfa04ebbc6a9167e9f900402761bbe4 100644 (file)
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -179,7 +179,7 @@ static void lguest_end_context_switch(struct task_struct *next)
         paravirt_end_context_switch(next);
  }
  
-/*G:033
+/*G:032
   * After that diversion we return to our first native-instruction
   * replacements: four functions for interrupt control.
   *
@@ -199,41 +199,28 @@ static unsigned long save_fl(void)
  {
         return lguest_data.irq_enabled;
  }
-PV_CALLEE_SAVE_REGS_THUNK(save_fl);
-
-/* restore_flags() just sets the flags back to the value given. */
-static void restore_fl(unsigned long flags)
-{
-       lguest_data.irq_enabled = flags;
-       mb();
-       /* Null hcall forces interrupt delivery now, if irq_pending is
-        * set to X86_EFLAGS_IF (ie. an interrupt is pending, and flags
-        * enables interrupts. */
-       if (flags & lguest_data.irq_pending)
-               kvm_hypercall0(LHCALL_SEND_INTERRUPTS);
-}
-PV_CALLEE_SAVE_REGS_THUNK(restore_fl);
  
  /* Interrupts go off... */
  static void irq_disable(void)
  {
         lguest_data.irq_enabled = 0;
  }
-PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
  
-/* Interrupts go on... */
-static void irq_enable(void)
-{
-       lguest_data.irq_enabled = X86_EFLAGS_IF;
-       mb();
-       /* Null hcall forces interrupt delivery now. */
-       if (lguest_data.irq_pending)
-               kvm_hypercall0(LHCALL_SEND_INTERRUPTS);
+/* Let's pause a moment.  Remember how I said these are called so often?
+ * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to
+ * break some rules.  In particular, these functions are assumed to save their
+ * own registers if they need to: normal C functions assume they can trash the
+ * eax register.  To use normal C functions, we use
+ * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the
+ * C function, then restores it. */
+PV_CALLEE_SAVE_REGS_THUNK(save_fl);
+PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
+/*:*/
  
-}
-PV_CALLEE_SAVE_REGS_THUNK(irq_enable);
+/* These are in i386_head.S */
+extern void lg_irq_enable(void);
+extern void lg_restore_fl(unsigned long flags);
  
-/*:*/
  /*M:003 Note that we don't check for outstanding interrupts when we re-enable
   * them (or when we unmask an interrupt).  This seems to work for the moment,
   * since interrupts are rare and we'll just get the interrupt on the next timer
@@ -1041,9 +1028,9 @@ __init void lguest_init(void)
         /* interrupt-related operations */
         pv_irq_ops.init_IRQ = lguest_init_IRQ;
         pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl);
-       pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl);
+       pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl);
         pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable);
-       pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable);
+       pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable);
         pv_irq_ops.safe_halt = lguest_safe_halt;
  
         /* init-time operations */
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S

index 3e0c5545d59caaff65bdc3d9146213899342fce9..a9c8cfe61cd4d48497a648538ce5b1c7d04e7cde 100644 (file)
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -47,7 +47,63 @@ ENTRY(lguest_entry)
  
  LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
  LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
-/*:*/
+
+/*G:033 But using those wrappers is inefficient (we'll see why that doesn't
+ * matter for save_fl and irq_disable later).  If we write our routines
+ * carefully in assembler, we can avoid clobbering any registers and avoid
+ * jumping through the wrapper functions.
+ *
+ * I skipped over our first piece of assembler, but this one is worth studying
+ * in a bit more detail so I'll describe in easy stages.  First, the routine
+ * to enable interrupts: */
+ENTRY(lg_irq_enable)
+       /* The reverse of irq_disable, this sets lguest_data.irq_enabled to
+        * X86_EFLAGS_IF (ie. "Interrupts enabled"). */
+       movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled
+       /* But now we need to check if the Host wants to know: there might have
+        * been interrupts waiting to be delivered, in which case it will have
+        * set lguest_data.irq_pending to X86_EFLAGS_IF.  If it's not zero, we
+        * jump to send_interrupts, otherwise we're done. */
+       testl $0, lguest_data+LGUEST_DATA_irq_pending
+       jnz send_interrupts
+       /* One cool thing about x86 is that you can do many things without using
+        * a register.  In this case, the normal path hasn't needed to save or
+        * restore any registers at all! */
+       ret
+send_interrupts:
+       /* OK, now we need a register: eax is used for the hypercall number,
+        * which is LHCALL_SEND_INTERRUPTS.
+        *
+        * We used not to bother with this pending detection at all, which was
+        * much simpler.  Sooner or later the Host would realize it had to
+        * send us an interrupt.  But that turns out to make performance 7
+        * times worse on a simple tcp benchmark.  So now we do this the hard
+        * way. */
+       pushl %eax
+       movl $LHCALL_SEND_INTERRUPTS, %eax
+       /* This is a vmcall instruction (same thing that KVM uses).  Older
+        * assembler versions might not know the "vmcall" instruction, so we
+        * create one manually here. */
+       .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
+       popl %eax
+       ret
+
+/* Finally, the "popf" or "restore flags" routine.  The %eax register holds the
+ * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're
+ * enabling interrupts again, if it's 0 we're leaving them off. */
+ENTRY(lg_restore_fl)
+       /* This is just "lguest_data.irq_enabled = flags;" */
+       movl %eax, lguest_data+LGUEST_DATA_irq_enabled
+       /* Now, if the %eax value has enabled interrupts and
+        * lguest_data.irq_pending is set, we want to tell the Host so it can
+        * deliver any outstanding interrupts.  Fortunately, both values will
+        * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl"
+        * instruction will AND them together for us.  If both are set, we
+        * jump to send_interrupts. */
+       testl lguest_data+LGUEST_DATA_irq_pending, %eax
+       jnz send_interrupts
+       /* Again, the normal path has used no extra registers.  Clever, huh? */
+       ret
  
  /* These demark the EIP range where host should never deliver interrupts. */
  .global lguest_noirq_start
author	Rusty Russell <rusty@rustcorp.com.au>
	Sat, 13 Jun 2009 04:27:03 +0000 (22:27 -0600)
committer	Rusty Russell <rusty@rustcorp.com.au>
	Fri, 12 Jun 2009 12:57:03 +0000 (22:27 +0930)
arch/x86/kernel/asm-offsets_32.c		patch \| blob \| history
arch/x86/lguest/boot.c		patch \| blob \| history
arch/x86/lguest/i386_head.S		patch \| blob \| history