if (!mddev->persistent) {
                clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
                clear_bit(MD_CHANGE_DEVS, &mddev->flags);
-               if (!mddev->external)
+               if (!mddev->external) {
                        clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+                       list_for_each_entry(rdev, &mddev->disks, same_set) {
+                               if (rdev->badblocks.changed) {
+                                       md_ack_all_badblocks(&rdev->badblocks);
+                                       md_error(mddev, rdev);
+                               }
+                               clear_bit(Blocked, &rdev->flags);
+                               clear_bit(BlockedBadBlocks, &rdev->flags);
+                               wake_up(&rdev->blocked_wait);
+                       }
+               }
                wake_up(&mddev->sb_wait);
                return;
        }
                mddev->events --;
        }
 
-       list_for_each_entry(rdev, &mddev->disks, same_set)
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                if (rdev->badblocks.changed)
                        any_badblocks_changed++;
+               if (test_bit(Faulty, &rdev->flags))
+                       set_bit(FaultRecorded, &rdev->flags);
+       }
 
        sync_sbs(mddev, nospares);
        spin_unlock_irq(&mddev->write_lock);
        if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                sysfs_notify(&mddev->kobj, NULL, "sync_completed");
 
-       if (any_badblocks_changed)
-               list_for_each_entry(rdev, &mddev->disks, same_set)
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
+               if (test_and_clear_bit(FaultRecorded, &rdev->flags))
+                       clear_bit(Blocked, &rdev->flags);
+
+               if (any_badblocks_changed)
                        md_ack_all_badblocks(&rdev->badblocks);
+               clear_bit(BlockedBadBlocks, &rdev->flags);
+               wake_up(&rdev->blocked_wait);
+       }
 }
 
 /* words written to sysfs files may, or may not, be \n terminated.
        char *sep = "";
        size_t len = 0;
 
-       if (test_bit(Faulty, &rdev->flags)) {
+       if (test_bit(Faulty, &rdev->flags) ||
+           rdev->badblocks.unacked_exist) {
                len+= sprintf(page+len, "%sfaulty",sep);
                sep = ",";
        }
                len += sprintf(page+len, "%swrite_mostly",sep);
                sep = ",";
        }
-       if (test_bit(Blocked, &rdev->flags)) {
+       if (test_bit(Blocked, &rdev->flags) ||
+           rdev->badblocks.unacked_exist) {
                len += sprintf(page+len, "%sblocked", sep);
                sep = ",";
        }
 state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 {
        /* can write
-        *  faulty  - simulates and error
+        *  faulty  - simulates an error
         *  remove  - disconnects the device
         *  writemostly - sets write_mostly
         *  -writemostly - clears write_mostly
-        *  blocked - sets the Blocked flag
-        *  -blocked - clears the Blocked flag
+        *  blocked - sets the Blocked flags
+        *  -blocked - clears the Blocked and possibly simulates an error
         *  insync - sets Insync providing device isn't active
         *  write_error - sets WriteErrorSeen
         *  -write_error - clears WriteErrorSeen
                set_bit(Blocked, &rdev->flags);
                err = 0;
        } else if (cmd_match(buf, "-blocked")) {
+               if (!test_bit(Faulty, &rdev->flags) &&
+                   test_bit(BlockedBadBlocks, &rdev->flags)) {
+                       /* metadata handler doesn't understand badblocks,
+                        * so we need to fail the device
+                        */
+                       md_error(rdev->mddev, rdev);
+               }
                clear_bit(Blocked, &rdev->flags);
+               clear_bit(BlockedBadBlocks, &rdev->flags);
                wake_up(&rdev->blocked_wait);
                set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
                md_wakeup_thread(rdev->mddev->thread);
 }
 static ssize_t bb_store(mdk_rdev_t *rdev, const char *page, size_t len)
 {
-       return badblocks_store(&rdev->badblocks, page, len, 0);
+       int rv = badblocks_store(&rdev->badblocks, page, len, 0);
+       /* Maybe that ack was all we needed */
+       if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
+               wake_up(&rdev->blocked_wait);
+       return rv;
 }
 static struct rdev_sysfs_entry rdev_bad_blocks =
 __ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
        if (!rdev || test_bit(Faulty, &rdev->flags))
                return;
 
-       if (mddev->external)
-               set_bit(Blocked, &rdev->flags);
-/*
-       dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n",
-               mdname(mddev),
-               MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev),
-               __builtin_return_address(0),__builtin_return_address(1),
-               __builtin_return_address(2),__builtin_return_address(3));
-*/
-       if (!mddev->pers)
-               return;
-       if (!mddev->pers->error_handler)
+       if (!mddev->pers || !mddev->pers->error_handler)
                return;
        mddev->pers->error_handler(mddev,rdev);
        if (mddev->degraded)
                list_for_each_entry(rdev, &mddev->disks, same_set) {
                        if (rdev->raid_disk >= 0 &&
                            !test_bit(In_sync, &rdev->flags) &&
-                           !test_bit(Faulty, &rdev->flags) &&
-                           !test_bit(Blocked, &rdev->flags))
+                           !test_bit(Faulty, &rdev->flags))
                                spares++;
                        if (rdev->raid_disk < 0
                            && !test_bit(Faulty, &rdev->flags)) {
 {
        sysfs_notify_dirent_safe(rdev->sysfs_state);
        wait_event_timeout(rdev->blocked_wait,
-                          !test_bit(Blocked, &rdev->flags),
+                          !test_bit(Blocked, &rdev->flags) &&
+                          !test_bit(BlockedBadBlocks, &rdev->flags),
                           msecs_to_jiffies(5000));
        rdev_dec_pending(rdev, mddev);
 }
        }
 
        bb->changed = 1;
+       if (!acknowledged)
+               bb->unacked_exist = 1;
        write_sequnlock_irq(&bb->lock);
 
        return rv;
                                p[i] = BB_MAKE(start, len, 1);
                        }
                }
+               bb->unacked_exist = 0;
        }
        write_sequnlock_irq(&bb->lock);
 }
                                (unsigned long long)s << bb->shift,
                                length << bb->shift);
        }
+       if (unack && len == 0)
+               bb->unacked_exist = 0;
 
        if (read_seqretry(&bb->lock, seq))
                goto retry;
 
 #define        In_sync         2               /* device is in_sync with rest of array */
 #define        WriteMostly     4               /* Avoid reading if at all possible */
 #define        AutoDetected    7               /* added by auto-detect */
-#define Blocked                8               /* An error occurred on an externally
-                                        * managed array, don't allow writes
+#define Blocked                8               /* An error occurred but has not yet
+                                        * been acknowledged by the metadata
+                                        * handler, so don't allow writes
                                         * until it is cleared */
 #define WriteErrorSeen 9               /* A write error has been seen on this
                                         * device
                                         */
+#define FaultRecorded  10              /* Intermediate state for clearing
+                                        * Blocked.  The Fault is/will-be
+                                        * recorded in the metadata, but that
+                                        * metadata hasn't been stored safely
+                                        * on disk yet.
+                                        */
+#define BlockedBadBlocks 11            /* A writer is blocked because they
+                                        * found an unacknowledged bad-block.
+                                        * This can safely be cleared at any
+                                        * time, and the writer will re-check.
+                                        * It may be set at any time, and at
+                                        * worst the writer will timeout and
+                                        * re-check.  So setting it as
+                                        * accurately as possible is good, but
+                                        * not absolutely critical.
+                                        */
        wait_queue_head_t blocked_wait;
 
        int desc_nr;                    /* descriptor index in the superblock */
 
        struct badblocks {
                int     count;          /* count of bad blocks */
+               int     unacked_exist;  /* there probably are unacknowledged
+                                        * bad blocks.  This is only cleared
+                                        * when a read discovers none
+                                        */
                int     shift;          /* shift from sectors to block size
                                         * a -ve shift means badblocks are
                                         * disabled.*/