return -err;
 }
 
-static int print_test_result(struct test_suite *t, int i, int subtest, int result, int width)
+#define TEST_RUNNING -3
+
+static int print_test_result(struct test_suite *t, int i, int subtest, int result, int width,
+                            int running)
 {
        if (has_subtests(t)) {
                int subw = width > 2 ? width - 2 : width;
                pr_info("%3d: %-*s:", i + 1, width, test_description(t, subtest));
 
        switch (result) {
+       case TEST_RUNNING:
+               color_fprintf(stderr, PERF_COLOR_YELLOW, " Running (%d active)\n", running);
+               break;
        case TEST_OK:
                pr_info(" Ok\n");
                break;
        return 0;
 }
 
-static int finish_test(struct child_test *child_test, int width)
+static int finish_test(struct child_test **child_tests, int running_test, int child_test_num,
+                      int width)
 {
+       struct child_test *child_test = child_tests[running_test];
        struct test_suite *t = child_test->test;
        int i = child_test->test_num;
        int subi = child_test->subtest;
        int err = child_test->process.err;
-       bool err_done = err <= 0;
+       bool err_done = false;
        struct strbuf err_output = STRBUF_INIT;
+       int last_running = -1;
        int ret;
 
        /*
         * Busy loop reading from the child's stdout/stderr that are set to be
         * non-blocking until EOF.
         */
-       if (!err_done)
+       if (err > 0)
                fcntl(err, F_SETFL, O_NONBLOCK);
        if (verbose > 1) {
                if (has_subtests(t))
                          .events = POLLIN | POLLERR | POLLHUP | POLLNVAL,
                        },
                };
-               char buf[512];
-               ssize_t len;
-
-               /* Poll to avoid excessive spinning, timeout set for 100ms. */
-               poll(pfds, ARRAY_SIZE(pfds), /*timeout=*/100);
-               if (!err_done && pfds[0].revents) {
-                       errno = 0;
-                       len = read(err, buf, sizeof(buf) - 1);
-
-                       if (len <= 0) {
-                               err_done = errno != EAGAIN;
-                       } else {
-                               buf[len] = '\0';
-                               if (verbose > 1)
-                                       fprintf(stdout, "%s", buf);
-                               else
+               if (perf_use_color_default) {
+                       int running = 0;
+
+                       for (int y = running_test; y < child_test_num; y++) {
+                               if (check_if_command_finished(&child_tests[y]->process) == 0)
+                                       running++;
+                       }
+                       if (running != last_running) {
+                               if (last_running != -1) {
+                                       /*
+                                        * Erase "Running (.. active)" line
+                                        * printed before poll/sleep.
+                                        */
+                                       fprintf(debug_file(), PERF_COLOR_DELETE_LINE);
+                               }
+                               print_test_result(t, i, subi, TEST_RUNNING, width, running);
+                               last_running = running;
+                       }
+               }
+
+               err_done = true;
+               if (err <= 0) {
+                       /* No child stderr to poll, sleep for 10ms for child to complete. */
+                       usleep(10 * 1000);
+               } else {
+                       /* Poll to avoid excessive spinning, timeout set for 100ms. */
+                       poll(pfds, ARRAY_SIZE(pfds), /*timeout=*/100);
+                       if (pfds[0].revents) {
+                               char buf[512];
+                               ssize_t len;
+
+                               len = read(err, buf, sizeof(buf) - 1);
+
+                               if (len > 0) {
+                                       err_done = false;
+                                       buf[len] = '\0';
                                        strbuf_addstr(&err_output, buf);
+                               }
                        }
                }
+               if (err_done)
+                       err_done = check_if_command_finished(&child_test->process);
+       }
+       if (perf_use_color_default && last_running != -1) {
+               /* Erase "Running (.. active)" line printed before poll/sleep. */
+               fprintf(debug_file(), PERF_COLOR_DELETE_LINE);
        }
        /* Clean up child process. */
        ret = finish_command(&child_test->process);
-       if (verbose == 1 && ret == TEST_FAIL) {
-               /* Add header for test that was skipped above. */
-               if (has_subtests(t))
-                       pr_info("%3d.%1d: %s:\n", i + 1, subi + 1, test_description(t, subi));
-               else
-                       pr_info("%3d: %s:\n", i + 1, test_description(t, -1));
+       if (verbose > 1 || (verbose == 1 && ret == TEST_FAIL))
                fprintf(stderr, "%s", err_output.buf);
-       }
+
        strbuf_release(&err_output);
-       print_test_result(t, i, subi, ret, width);
+       print_test_result(t, i, subi, ret, width, /*running=*/0);
        if (err > 0)
                close(err);
        return 0;
                pr_debug("--- start ---\n");
                err = test_function(test, subi)(test, subi);
                pr_debug("---- end ----\n");
-               print_test_result(test, i, subi, err, width);
+               print_test_result(test, i, subi, err, width, /*running=*/0);
                return 0;
        }
 
        err = start_command(&(*child)->process);
        if (err || !sequential)
                return  err;
-       return finish_test(*child, width);
+       return finish_test(child, /*running_test=*/0, /*child_test_num=*/1, width);
 }
 
 #define for_each_test(j, k, t)                                 \
        }
        for (i = 0; i < child_test_num; i++) {
                if (!sequential) {
-                       int ret  = finish_test(child_tests[i], width);
+                       int ret  = finish_test(child_tests, i, child_test_num, width);
 
                        if (ret)
                                return ret;