* Ensure irq/preemption can't change debugctl in between.
         * Note also that both TIF_BLOCKSTEP and debugctl should
         * be changed atomically wrt preemption.
-        * FIXME: this means that set/clear TIF_BLOCKSTEP is simply
-        * wrong if task != current, SIGKILL can wakeup the stopped
-        * tracee and set/clear can play with the running task, this
-        * can confuse the next __switch_to_xtra().
+        *
+        * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if
+        * task is current or it can't be running, otherwise we can race
+        * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but
+        * PTRACE_KILL is not safe.
         */
        local_irq_disable();
        debugctl = get_debugctlmsr();
 
        spin_unlock(&child->sighand->siglock);
 }
 
+/* Ensure that nothing can wake it up, even SIGKILL */
+static bool ptrace_freeze_traced(struct task_struct *task)
+{
+       bool ret = false;
+
+       /* Lockless, nobody but us can set this flag */
+       if (task->jobctl & JOBCTL_LISTENING)
+               return ret;
+
+       spin_lock_irq(&task->sighand->siglock);
+       if (task_is_traced(task) && !__fatal_signal_pending(task)) {
+               task->state = __TASK_TRACED;
+               ret = true;
+       }
+       spin_unlock_irq(&task->sighand->siglock);
+
+       return ret;
+}
+
+static void ptrace_unfreeze_traced(struct task_struct *task)
+{
+       if (task->state != __TASK_TRACED)
+               return;
+
+       WARN_ON(!task->ptrace || task->parent != current);
+
+       spin_lock_irq(&task->sighand->siglock);
+       if (__fatal_signal_pending(task))
+               wake_up_state(task, __TASK_TRACED);
+       else
+               task->state = TASK_TRACED;
+       spin_unlock_irq(&task->sighand->siglock);
+}
+
 /**
  * ptrace_check_attach - check whether ptracee is ready for ptrace operation
  * @child: ptracee to check for
         * be changed by us so it's not changing right after this.
         */
        read_lock(&tasklist_lock);
-       if ((child->ptrace & PT_PTRACED) && child->parent == current) {
+       if (child->ptrace && child->parent == current) {
+               WARN_ON(child->state == __TASK_TRACED);
                /*
                 * child->sighand can't be NULL, release_task()
                 * does ptrace_unlink() before __exit_signal().
                 */
-               spin_lock_irq(&child->sighand->siglock);
-               WARN_ON_ONCE(task_is_stopped(child));
-               if (ignore_state || (task_is_traced(child) &&
-                                    !(child->jobctl & JOBCTL_LISTENING)))
+               if (ignore_state || ptrace_freeze_traced(child))
                        ret = 0;
-               spin_unlock_irq(&child->sighand->siglock);
        }
        read_unlock(&tasklist_lock);
 
-       if (!ret && !ignore_state)
-               ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH;
+       if (!ret && !ignore_state) {
+               if (!wait_task_inactive(child, __TASK_TRACED)) {
+                       /*
+                        * This can only happen if may_ptrace_stop() fails and
+                        * ptrace_stop() changes ->state back to TASK_RUNNING,
+                        * so we should not worry about leaking __TASK_TRACED.
+                        */
+                       WARN_ON(child->state == __TASK_TRACED);
+                       ret = -ESRCH;
+               }
+       }
 
-       /* All systems go.. */
        return ret;
 }
 
                goto out_put_task_struct;
 
        ret = arch_ptrace(child, request, addr, data);
+       if (ret || request != PTRACE_DETACH)
+               ptrace_unfreeze_traced(child);
 
  out_put_task_struct:
        put_task_struct(child);
 
        ret = ptrace_check_attach(child, request == PTRACE_KILL ||
                                  request == PTRACE_INTERRUPT);
-       if (!ret)
+       if (!ret) {
                ret = compat_arch_ptrace(child, request, addr, data);
+               if (ret || request != PTRACE_DETACH)
+                       ptrace_unfreeze_traced(child);
+       }
 
  out_put_task_struct:
        put_task_struct(child);
 
         * If SIGKILL was already sent before the caller unlocked
         * ->siglock we must see ->core_state != NULL. Otherwise it
         * is safe to enter schedule().
+        *
+        * This is almost outdated, a task with the pending SIGKILL can't
+        * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
+        * after SIGKILL was already dequeued.
         */
        if (unlikely(current->mm->core_state) &&
            unlikely(current->mm == current->parent->mm))
                if (gstop_done)
                        do_notify_parent_cldstop(current, false, why);
 
+               /* tasklist protects us from ptrace_freeze_traced() */
                __set_current_state(TASK_RUNNING);
                if (clear_code)
                        current->exit_code = 0;