return 0;
 }
 
+static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread,
+                                            struct callchain_cursor *cursor)
+{
+       struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+       struct callchain_cursor_node *cnode;
+       struct stitch_list *stitch_node;
+       int err;
+
+       list_for_each_entry(stitch_node, &lbr_stitch->lists, node) {
+               cnode = &stitch_node->cursor;
+
+               err = callchain_cursor_append(cursor, cnode->ip,
+                                             &cnode->ms,
+                                             cnode->branch,
+                                             &cnode->branch_flags,
+                                             cnode->nr_loop_iter,
+                                             cnode->iter_cycles,
+                                             cnode->branch_from,
+                                             cnode->srcline);
+               if (err)
+                       return err;
+       }
+       return 0;
+}
+
+static struct stitch_list *get_stitch_node(struct thread *thread)
+{
+       struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+       struct stitch_list *stitch_node;
+
+       if (!list_empty(&lbr_stitch->free_lists)) {
+               stitch_node = list_first_entry(&lbr_stitch->free_lists,
+                                              struct stitch_list, node);
+               list_del(&stitch_node->node);
+
+               return stitch_node;
+       }
+
+       return malloc(sizeof(struct stitch_list));
+}
+
+static bool has_stitched_lbr(struct thread *thread,
+                            struct perf_sample *cur,
+                            struct perf_sample *prev,
+                            unsigned int max_lbr,
+                            bool callee)
+{
+       struct branch_stack *cur_stack = cur->branch_stack;
+       struct branch_entry *cur_entries = perf_sample__branch_entries(cur);
+       struct branch_stack *prev_stack = prev->branch_stack;
+       struct branch_entry *prev_entries = perf_sample__branch_entries(prev);
+       struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+       int i, j, nr_identical_branches = 0;
+       struct stitch_list *stitch_node;
+       u64 cur_base, distance;
+
+       if (!cur_stack || !prev_stack)
+               return false;
+
+       /* Find the physical index of the base-of-stack for current sample. */
+       cur_base = max_lbr - cur_stack->nr + cur_stack->hw_idx + 1;
+
+       distance = (prev_stack->hw_idx > cur_base) ? (prev_stack->hw_idx - cur_base) :
+                                                    (max_lbr + prev_stack->hw_idx - cur_base);
+       /* Previous sample has shorter stack. Nothing can be stitched. */
+       if (distance + 1 > prev_stack->nr)
+               return false;
+
+       /*
+        * Check if there are identical LBRs between two samples.
+        * Identicall LBRs must have same from, to and flags values. Also,
+        * they have to be saved in the same LBR registers (same physical
+        * index).
+        *
+        * Starts from the base-of-stack of current sample.
+        */
+       for (i = distance, j = cur_stack->nr - 1; (i >= 0) && (j >= 0); i--, j--) {
+               if ((prev_entries[i].from != cur_entries[j].from) ||
+                   (prev_entries[i].to != cur_entries[j].to) ||
+                   (prev_entries[i].flags.value != cur_entries[j].flags.value))
+                       break;
+               nr_identical_branches++;
+       }
+
+       if (!nr_identical_branches)
+               return false;
+
+       /*
+        * Save the LBRs between the base-of-stack of previous sample
+        * and the base-of-stack of current sample into lbr_stitch->lists.
+        * These LBRs will be stitched later.
+        */
+       for (i = prev_stack->nr - 1; i > (int)distance; i--) {
+
+               if (!lbr_stitch->prev_lbr_cursor[i].valid)
+                       continue;
+
+               stitch_node = get_stitch_node(thread);
+               if (!stitch_node)
+                       return false;
+
+               memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i],
+                      sizeof(struct callchain_cursor_node));
+
+               if (callee)
+                       list_add(&stitch_node->node, &lbr_stitch->lists);
+               else
+                       list_add_tail(&stitch_node->node, &lbr_stitch->lists);
+       }
+
+       return true;
+}
+
 static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr)
 {
        if (thread->lbr_stitch)
        if (!thread->lbr_stitch->prev_lbr_cursor)
                goto free_lbr_stitch;
 
+       INIT_LIST_HEAD(&thread->lbr_stitch->lists);
+       INIT_LIST_HEAD(&thread->lbr_stitch->free_lists);
+
        return true;
 
 free_lbr_stitch:
                                        int max_stack,
                                        unsigned int max_lbr)
 {
+       bool callee = (callchain_param.order == ORDER_CALLEE);
        struct ip_callchain *chain = sample->callchain;
        int chain_nr = min(max_stack, (int)chain->nr), i;
        struct lbr_stitch *lbr_stitch;
+       bool stitched_lbr = false;
        u64 branch_from = 0;
        int err;
 
            (max_lbr > 0) && alloc_lbr_stitch(thread, max_lbr)) {
                lbr_stitch = thread->lbr_stitch;
 
+               stitched_lbr = has_stitched_lbr(thread, sample,
+                                               &lbr_stitch->prev_sample,
+                                               max_lbr, callee);
+
+               if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) {
+                       list_replace_init(&lbr_stitch->lists,
+                                         &lbr_stitch->free_lists);
+               }
                memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample));
        }
 
-       if (callchain_param.order == ORDER_CALLEE) {
+       if (callee) {
                /* Add kernel ip */
                err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
                                                  parent, root_al, branch_from,
                if (err)
                        goto error;
 
+               if (stitched_lbr) {
+                       err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
+                       if (err)
+                               goto error;
+               }
+
        } else {
+               if (stitched_lbr) {
+                       err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
+                       if (err)
+                               goto error;
+               }
                err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
                                               root_al, &branch_from, false);
                if (err)