struct branch_entry *entries = perf_sample__branch_entries(sample);
        uint64_t i;
 
-       printf("%s: nr:%" PRIu64 "\n",
-               !callstack ? "... branch stack" : "... branch callstack",
-               sample->branch_stack->nr);
+       if (!callstack) {
+               printf("%s: nr:%" PRIu64 "\n", "... branch stack", sample->branch_stack->nr);
+       } else {
+               /* the reason of adding 1 to nr is because after expanding
+                * branch stack it generates nr + 1 callstack records. e.g.,
+                *         B()->C()
+                *         A()->B()
+                * the final callstack should be:
+                *         C()
+                *         B()
+                *         A()
+                */
+               printf("%s: nr:%" PRIu64 "\n", "... branch callstack", sample->branch_stack->nr+1);
+       }
 
        for (i = 0; i < sample->branch_stack->nr; i++) {
                struct branch_entry *e = &entries[i];
                                (unsigned)e->flags.reserved,
                                e->flags.type ? branch_type_name(e->flags.type) : "");
                } else {
-                       printf("..... %2"PRIu64": %016" PRIx64 "\n",
-                               i, i > 0 ? e->from : e->to);
+                       if (i == 0) {
+                               printf("..... %2"PRIu64": %016" PRIx64 "\n"
+                                      "..... %2"PRIu64": %016" PRIx64 "\n",
+                                               i, e->to, i+1, e->from);
+                       } else {
+                               printf("..... %2"PRIu64": %016" PRIx64 "\n", i+1, e->from);
+                       }
                }
        }
 }