tcg: allocate TB structs before the corresponding translated code

author Emilio G. Cota <cota@braap.org>

Tue, 6 Jun 2017 23:12:25 +0000 (19:12 -0400)

committer Richard Henderson <rth@twiddle.net>

Mon, 19 Jun 2017 18:10:59 +0000 (11:10 -0700)
author Emilio G. Cota <cota@braap.org>
Tue, 6 Jun 2017 23:12:25 +0000 (19:12 -0400)
committer Richard Henderson <rth@twiddle.net>
Mon, 19 Jun 2017 18:10:59 +0000 (11:10 -0700)
diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h

index c7f17f26e07c364ae1bef86c23dacfec602581a9..25c2afe753d783849ee60ad2d6ac1ea7bfc8de51 100644 (file)
--- a/include/exec/tb-context.h
+++ b/include/exec/tb-context.h
@@ -31,8 +31,9 @@ typedef struct TBContext TBContext;
  
  struct TBContext {
  
-    TranslationBlock *tbs;
+    TranslationBlock **tbs;
      struct qht htable;
+    size_t tbs_size;
      int nb_tbs;
      /* any access to the tbs or the page table must use this lock */
      QemuMutex tb_lock;
diff --git a/tcg/tcg.c b/tcg/tcg.c

index 564292f54dddf5646c00a45e96ce15ab0987914e..35598296c51555357f663a77ac65cb8b74a9a3ae 100644 (file)
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -383,6 +383,26 @@ void tcg_context_init(TCGContext *s)
      }
  }
  
+/*
+ * Allocate TBs right before their corresponding translated code, making
+ * sure that TBs and code are on different cache lines.
+ */
+TranslationBlock *tcg_tb_alloc(TCGContext *s)
+{
+    uintptr_t align = qemu_icache_linesize;
+    TranslationBlock *tb;
+    void *next;
+
+    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
+    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
+
+    if (unlikely(next > s->code_gen_highwater)) {
+        return NULL;
+    }
+    s->code_gen_ptr = next;
+    return tb;
+}
+
  void tcg_prologue_init(TCGContext *s)
  {
      size_t prologue_size, total_size;
diff --git a/tcg/tcg.h b/tcg/tcg.h

index 5ec48d178764f324d3ad149c5b472085eff8caae..9e37722799a604a8b07e4430f2bc9d5dec49d9f2 100644 (file)
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -697,7 +697,6 @@ struct TCGContext {
         here, because there's too much arithmetic throughout that relies
         on addition and subtraction working on bytes.  Rely on the GCC
         extension that allows arithmetic on void*.  */
-    int code_gen_max_blocks;
      void *code_gen_prologue;
      void *code_gen_epilogue;
      void *code_gen_buffer;
@@ -756,6 +755,7 @@ static inline bool tcg_op_buf_full(void)
  /* tb_lock must be held for tcg_malloc_internal. */
  void *tcg_malloc_internal(TCGContext *s, int size);
  void tcg_pool_reset(TCGContext *s);
+TranslationBlock *tcg_tb_alloc(TCGContext *s);
  
  void tb_lock(void);
  void tb_unlock(void);
diff --git a/translate-all.c b/translate-all.c

index b3ee876526a814458ae6293f46c618df27e2ae41..bb094ad0dbc877d8800ae44dc518d3b740e2f34a 100644 (file)
--- a/translate-all.c
+++ b/translate-all.c
@@ -781,12 +781,13 @@ static inline void code_gen_alloc(size_t tb_size)
          exit(1);
      }
  
-    /* Estimate a good size for the number of TBs we can support.  We
-       still haven't deducted the prologue from the buffer size here,
-       but that's minimal and won't affect the estimate much.  */
-    tcg_ctx.code_gen_max_blocks
-        = tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
-    tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks);
+    /* size this conservatively -- realloc later if needed */
+    tcg_ctx.tb_ctx.tbs_size =
+        tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE / 8;
+    if (unlikely(!tcg_ctx.tb_ctx.tbs_size)) {
+        tcg_ctx.tb_ctx.tbs_size = 64 * 1024;
+    }
+    tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock *, tcg_ctx.tb_ctx.tbs_size);
  
      qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
  }
@@ -828,13 +829,20 @@ bool tcg_enabled(void)
  static TranslationBlock *tb_alloc(target_ulong pc)
  {
      TranslationBlock *tb;
+    TBContext *ctx;
  
      assert_tb_locked();
  
-    if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) {
+    tb = tcg_tb_alloc(&tcg_ctx);
+    if (unlikely(tb == NULL)) {
          return NULL;
      }
-    tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
+    ctx = &tcg_ctx.tb_ctx;
+    if (unlikely(ctx->nb_tbs == ctx->tbs_size)) {
+        ctx->tbs_size *= 2;
+        ctx->tbs = g_renew(TranslationBlock *, ctx->tbs, ctx->tbs_size);
+    }
+    ctx->tbs[ctx->nb_tbs++] = tb;
      tb->pc = pc;
      tb->cflags = 0;
      tb->invalid = false;
@@ -850,8 +858,10 @@ void tb_free(TranslationBlock *tb)
         Ignore the hard cases and just back up if this TB happens to
         be the last one generated.  */
      if (tcg_ctx.tb_ctx.nb_tbs > 0 &&
-            tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
-        tcg_ctx.code_gen_ptr = tb->tc_ptr;
+            tb == tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
+        size_t struct_size = ROUND_UP(sizeof(*tb), qemu_icache_linesize);
+
+        tcg_ctx.code_gen_ptr = tb->tc_ptr - struct_size;
          tcg_ctx.tb_ctx.nb_tbs--;
      }
  }
@@ -1666,7 +1676,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
      m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
      while (m_min <= m_max) {
          m = (m_min + m_max) >> 1;
-        tb = &tcg_ctx.tb_ctx.tbs[m];
+        tb = tcg_ctx.tb_ctx.tbs[m];
          v = (uintptr_t)tb->tc_ptr;
          if (v == tc_ptr) {
              return tb;
@@ -1676,7 +1686,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
              m_min = m + 1;
          }
      }
-    return &tcg_ctx.tb_ctx.tbs[m_max];
+    return tcg_ctx.tb_ctx.tbs[m_max];
  }
  
  #if !defined(CONFIG_USER_ONLY)
@@ -1874,7 +1884,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
      direct_jmp_count = 0;
      direct_jmp2_count = 0;
      for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) {
-        tb = &tcg_ctx.tb_ctx.tbs[i];
+        tb = tcg_ctx.tb_ctx.tbs[i];
          target_code_size += tb->size;
          if (tb->size > max_target_code_size) {
              max_target_code_size = tb->size;
@@ -1894,8 +1904,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
      cpu_fprintf(f, "gen code size       %td/%zd\n",
                  tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
                  tcg_ctx.code_gen_highwater - tcg_ctx.code_gen_buffer);
-    cpu_fprintf(f, "TB count            %d/%d\n",
-            tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks);
+    cpu_fprintf(f, "TB count            %d\n", tcg_ctx.tb_ctx.nb_tbs);
      cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
              tcg_ctx.tb_ctx.nb_tbs ? target_code_size /
                      tcg_ctx.tb_ctx.nb_tbs : 0,
author	Emilio G. Cota <cota@braap.org>
	Tue, 6 Jun 2017 23:12:25 +0000 (19:12 -0400)
committer	Richard Henderson <rth@twiddle.net>
	Mon, 19 Jun 2017 18:10:59 +0000 (11:10 -0700)
include/exec/tb-context.h		patch \| blob \| history
tcg/tcg.c		patch \| blob \| history
tcg/tcg.h		patch \| blob \| history
translate-all.c		patch \| blob \| history