#include "bpf_skel/off_cpu.skel.h"
 
 #define MAX_STACKS  32
+#define MAX_PROC  4096
 /* we don't need actual timestamp, just want to put the samples at last */
 #define OFF_CPU_TIMESTAMP  (~0ull << 32)
 
 
                        ntasks++;
                }
+
+               if (ntasks < MAX_PROC)
+                       ntasks = MAX_PROC;
+
                bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
        } else if (target__has_task(target)) {
                ntasks = perf_thread_map__nr(evlist->core.threads);
                bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+       } else if (target__none(target)) {
+               bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC);
        }
 
        if (evlist__first(evlist)->cgrp) {
 
 #define TASK_INTERRUPTIBLE     0x0001
 #define TASK_UNINTERRUPTIBLE   0x0002
 
+/* create a new thread */
+#define CLONE_THREAD  0x10000
+
 #define MAX_STACKS   32
 #define MAX_ENTRIES  102400
 
        return 0;
 }
 
+SEC("tp_btf/task_newtask")
+int on_newtask(u64 *ctx)
+{
+       struct task_struct *task;
+       u64 clone_flags;
+       u32 pid;
+       u8 val = 1;
+
+       if (!uses_tgid)
+               return 0;
+
+       task = (struct task_struct *)bpf_get_current_task();
+
+       pid = BPF_CORE_READ(task, tgid);
+       if (!bpf_map_lookup_elem(&task_filter, &pid))
+               return 0;
+
+       task = (struct task_struct *)ctx[0];
+       clone_flags = ctx[1];
+
+       pid = task->tgid;
+       if (!(clone_flags & CLONE_THREAD))
+               bpf_map_update_elem(&task_filter, &pid, &val, BPF_NOEXIST);
+
+       return 0;
+}
+
 SEC("tp_btf/sched_switch")
 int on_switch(u64 *ctx)
 {