#define MAXIOC (8192)  /* default meant to avoid most soft lockups */
 
 static void ktcomplete(struct frame *, struct sk_buff *);
+static int count_targets(struct aoedev *d, int *untainted);
 
 static struct buf *nextbuf(struct aoedev *);
 
        spinlock_t lock;
 } iocq;
 
+static struct page *empty_page;
+
 static struct sk_buff *
 new_skb(ulong len)
 {
 
        t = f->t;
        f->buf = NULL;
+       f->lba = 0;
        f->bv = NULL;
        f->r_skb = NULL;
+       f->flags = 0;
        list_add(&f->head, &t->ffree);
 }
 
        struct frame *f;
        struct aoetgt *t, **tt;
        int totout = 0;
+       int use_tainted;
+       int has_untainted;
 
        if (d->targets[0] == NULL) {    /* shouldn't happen, but I'm paranoid */
                printk(KERN_ERR "aoe: NULL TARGETS!\n");
                return NULL;
        }
        tt = d->tgt;    /* last used target */
-       for (;;) {
+       for (use_tainted = 0, has_untainted = 0;;) {
                tt++;
                if (tt >= &d->targets[NTARGETS] || !*tt)
                        tt = d->targets;
                t = *tt;
-               totout += t->nout;
+               if (!t->taint) {
+                       has_untainted = 1;
+                       totout += t->nout;
+               }
                if (t->nout < t->maxout
-               && t != d->htgt
+               && (use_tainted || !t->taint)
                && t->ifp->nd) {
                        f = newtframe(d, t);
                        if (f) {
                                return f;
                        }
                }
-               if (tt == d->tgt)       /* we've looped and found nada */
-                       break;
+               if (tt == d->tgt) {     /* we've looped and found nada */
+                       if (!use_tainted && !has_untainted)
+                               use_tainted = 1;
+                       else
+                               break;
+               }
        }
        if (totout == 0) {
                d->kicked++;
        list_add_tail(&f->head, &d->factive[n]);
 }
 
+static void
+ata_rw_frameinit(struct frame *f)
+{
+       struct aoetgt *t;
+       struct aoe_hdr *h;
+       struct aoe_atahdr *ah;
+       struct sk_buff *skb;
+       char writebit, extbit;
+
+       skb = f->skb;
+       h = (struct aoe_hdr *) skb_mac_header(skb);
+       ah = (struct aoe_atahdr *) (h + 1);
+       skb_put(skb, sizeof(*h) + sizeof(*ah));
+       memset(h, 0, skb->len);
+
+       writebit = 0x10;
+       extbit = 0x4;
+
+       t = f->t;
+       f->tag = aoehdr_atainit(t->d, t, h);
+       fhash(f);
+       t->nout++;
+       f->waited = 0;
+       f->waited_total = 0;
+       if (f->buf)
+               f->lba = f->buf->sector;
+
+       /* set up ata header */
+       ah->scnt = f->bcnt >> 9;
+       put_lba(ah, f->lba);
+       if (t->d->flags & DEVFL_EXT) {
+               ah->aflags |= AOEAFL_EXT;
+       } else {
+               extbit = 0;
+               ah->lba3 &= 0x0f;
+               ah->lba3 |= 0xe0;       /* LBA bit + obsolete 0xa0 */
+       }
+       if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
+               skb_fillup(skb, f->bv, f->bv_off, f->bcnt);
+               ah->aflags |= AOEAFL_WRITE;
+               skb->len += f->bcnt;
+               skb->data_len = f->bcnt;
+               skb->truesize += f->bcnt;
+               t->wpkts++;
+       } else {
+               t->rpkts++;
+               writebit = 0;
+       }
+
+       ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
+       skb->dev = t->ifp->nd;
+}
+
 static int
 aoecmd_ata_rw(struct aoedev *d)
 {
        struct frame *f;
-       struct aoe_hdr *h;
-       struct aoe_atahdr *ah;
        struct buf *buf;
        struct aoetgt *t;
        struct sk_buff *skb;
        struct sk_buff_head queue;
        ulong bcnt, fbcnt;
-       char writebit, extbit;
-
-       writebit = 0x10;
-       extbit = 0x4;
 
        buf = nextbuf(d);
        if (buf == NULL)
        } while (fbcnt);
 
        /* initialize the headers & frame */
-       skb = f->skb;
-       h = (struct aoe_hdr *) skb_mac_header(skb);
-       ah = (struct aoe_atahdr *) (h+1);
-       skb_put(skb, sizeof *h + sizeof *ah);
-       memset(h, 0, skb->len);
-       f->tag = aoehdr_atainit(d, t, h);
-       fhash(f);
-       t->nout++;
-       f->waited = 0;
-       f->waited_total = 0;
        f->buf = buf;
        f->bcnt = bcnt;
-       f->lba = buf->sector;
-
-       /* set up ata header */
-       ah->scnt = bcnt >> 9;
-       put_lba(ah, buf->sector);
-       if (d->flags & DEVFL_EXT) {
-               ah->aflags |= AOEAFL_EXT;
-       } else {
-               extbit = 0;
-               ah->lba3 &= 0x0f;
-               ah->lba3 |= 0xe0;       /* LBA bit + obsolete 0xa0 */
-       }
-       if (bio_data_dir(buf->bio) == WRITE) {
-               skb_fillup(skb, f->bv, f->bv_off, bcnt);
-               ah->aflags |= AOEAFL_WRITE;
-               skb->len += bcnt;
-               skb->data_len = bcnt;
-               skb->truesize += bcnt;
-               t->wpkts++;
-       } else {
-               t->rpkts++;
-               writebit = 0;
-       }
-
-       ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
+       ata_rw_frameinit(f);
 
        /* mark all tracking fields and load out */
        buf->nframesout += 1;
        buf->sector += bcnt >> 9;
 
-       skb->dev = t->ifp->nd;
-       skb = skb_clone(skb, GFP_ATOMIC);
+       skb = skb_clone(f->skb, GFP_ATOMIC);
        if (skb) {
                do_gettimeofday(&f->sent);
                f->sent_jiffs = (u32) jiffies;
        h = (struct aoe_hdr *) skb_mac_header(skb);
        ah = (struct aoe_atahdr *) (h+1);
 
-       snprintf(buf, sizeof buf,
-               "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
-               "retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
-               h->src, h->dst, t->nout);
-       aoechr_error(buf);
+       if (!(f->flags & FFL_PROBE)) {
+               snprintf(buf, sizeof(buf),
+                       "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
+                       "retransmit", d->aoemajor, d->aoeminor,
+                       f->tag, jiffies, n,
+                       h->src, h->dst, t->nout);
+               aoechr_error(buf);
+       }
 
        f->tag = n;
        fhash(f);
 }
 
 static struct frame *
-reassign_frame(struct list_head *pos)
+reassign_frame(struct frame *f)
 {
-       struct frame *f;
        struct frame *nf;
        struct sk_buff *skb;
 
-       f = list_entry(pos, struct frame, head);
        nf = newframe(f->t->d);
        if (!nf)
                return NULL;
-
-       list_del(pos);
+       if (nf->t == f->t) {
+               aoe_freetframe(nf);
+               return NULL;
+       }
 
        skb = nf->skb;
        nf->skb = f->skb;
        nf->sent = f->sent;
        nf->sent_jiffs = f->sent_jiffs;
        f->skb = skb;
-       aoe_freetframe(f);
-       f->t->nout--;
-       nf->t->nout++;
 
        return nf;
 }
 
-static int
-sthtith(struct aoedev *d)
+static void
+probe(struct aoetgt *t)
 {
-       struct frame *f, *nf;
-       struct list_head *nx, *pos, *head;
-       struct aoetgt *ht = d->htgt;
-       int i;
+       struct aoedev *d;
+       struct frame *f;
+       struct sk_buff *skb;
+       struct sk_buff_head queue;
+       size_t n, m;
+       int frag;
 
-       /* look through the active and pending retransmit frames */
-       for (i = 0; i < NFACTIVE; i++) {
-               head = &d->factive[i];
-               list_for_each_safe(pos, nx, head) {
-                       f = list_entry(pos, struct frame, head);
-                       if (f->t != ht)
-                               continue;
-                       nf = reassign_frame(pos);
-                       if (!nf)
-                               return 0;
-                       resend(d, nf);
-               }
+       d = t->d;
+       f = newtframe(d, t);
+       if (!f) {
+               pr_err("%s %pm for e%ld.%d: %s\n",
+                       "aoe: cannot probe remote address",
+                       t->addr,
+                       (long) d->aoemajor, d->aoeminor,
+                       "no frame available");
+               return;
        }
-       head = &d->rexmitq;
-       list_for_each_safe(pos, nx, head) {
-               f = list_entry(pos, struct frame, head);
-               if (f->t != ht)
-                       continue;
-               nf = reassign_frame(pos);
-               if (!nf)
-                       return 0;
-               resend(d, nf);
+       f->flags |= FFL_PROBE;
+       ifrotate(t);
+       f->bcnt = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
+       ata_rw_frameinit(f);
+       skb = f->skb;
+       for (frag = 0, n = f->bcnt; n > 0; ++frag, n -= m) {
+               if (n < PAGE_SIZE)
+                       m = n;
+               else
+                       m = PAGE_SIZE;
+               skb_fill_page_desc(skb, frag, empty_page, 0, m);
        }
-       /* We've cleaned up the outstanding so take away his
-        * interfaces so he won't be used.  We should remove him from
-        * the target array here, but cleaning up a target is
-        * involved.  PUNT!
-        */
-       memset(ht->ifs, 0, sizeof ht->ifs);
-       d->htgt = NULL;
-       return 1;
+       skb->len += f->bcnt;
+       skb->data_len = f->bcnt;
+       skb->truesize += f->bcnt;
+
+       skb = skb_clone(f->skb, GFP_ATOMIC);
+       if (skb) {
+               do_gettimeofday(&f->sent);
+               f->sent_jiffs = (u32) jiffies;
+               __skb_queue_head_init(&queue);
+               __skb_queue_tail(&queue, skb);
+               aoenet_xmit(&queue);
+       }
+}
+
+static long
+rto(struct aoedev *d)
+{
+       long t;
+
+       t = 2 * d->rttavg >> RTTSCALE;
+       t += 8 * d->rttdev >> RTTDSCALE;
+       if (t == 0)
+               t = 1;
+
+       return t;
 }
 
 static void
 {
        struct aoetgt *t;
        struct frame *f;
+       struct frame *nf;
        struct list_head *pos, *nx, *head;
        int since;
+       int untainted;
+
+       count_targets(d, &untainted);
 
        head = &d->rexmitq;
        list_for_each_safe(pos, nx, head) {
                f = list_entry(pos, struct frame, head);
                t = f->t;
+               if (t->taint) {
+                       if (!(f->flags & FFL_PROBE)) {
+                               nf = reassign_frame(f);
+                               if (nf) {
+                                       if (t->nout_probes == 0
+                                       && untainted > 0) {
+                                               probe(t);
+                                               t->nout_probes++;
+                                       }
+                                       list_replace(&f->head, &nf->head);
+                                       pos = &nf->head;
+                                       aoe_freetframe(f);
+                                       f = nf;
+                                       t = f->t;
+                               }
+                       } else if (untainted < 1) {
+                               /* don't probe w/o other untainted aoetgts */
+                               goto stop_probe;
+                       } else if (tsince_hr(f) < t->taint * rto(d)) {
+                               /* reprobe slowly when taint is high */
+                               continue;
+                       }
+               } else if (f->flags & FFL_PROBE) {
+stop_probe:            /* don't probe untainted aoetgts */
+                       list_del(pos);
+                       aoe_freetframe(f);
+                       /* leaving d->kicked, because this is routine */
+                       f->t->d->flags |= DEVFL_KICKME;
+                       continue;
+               }
                if (t->nout >= t->maxout)
                        continue;
                list_del(pos);
                t->nout++;
+               if (f->flags & FFL_PROBE)
+                       t->nout_probes++;
                since = tsince_hr(f);
                f->waited += since;
                f->waited_total += since;
        }
 }
 
+/* An aoetgt accumulates demerits quickly, and successful
+ * probing redeems the aoetgt slowly.
+ */
+static void
+scorn(struct aoetgt *t)
+{
+       int n;
+
+       n = t->taint++;
+       t->taint += t->taint * 2;
+       if (n > t->taint)
+               t->taint = n;
+       if (t->taint > MAX_TAINT)
+               t->taint = MAX_TAINT;
+}
+
+static int
+count_targets(struct aoedev *d, int *untainted)
+{
+       int i, good;
+
+       for (i = good = 0; i < d->ntargets && d->targets[i]; ++i)
+               if (d->targets[i]->taint == 0)
+                       good++;
+
+       if (untainted)
+               *untainted = good;
+       return i;
+}
+
 static void
 rexmit_timer(ulong vp)
 {
        register long timeout;
        ulong flags, n;
        int i;
+       int utgts;      /* number of aoetgt descriptors (not slots) */
        int since;
 
        d = (struct aoedev *) vp;
        spin_lock_irqsave(&d->lock, flags);
 
        /* timeout based on observed timings and variations */
-       timeout = 2 * d->rttavg >> RTTSCALE;
-       timeout += 8 * d->rttdev >> RTTDSCALE;
-       if (timeout == 0)
-               timeout = 1;
+       timeout = rto(d);
+
+       utgts = count_targets(d, NULL);
 
        if (d->flags & DEVFL_TKILL) {
                spin_unlock_irqrestore(&d->lock, flags);
                since = tsince_hr(f);
                n = f->waited_total + since;
                n /= USEC_PER_SEC;
-               if (n > aoe_deadsecs) {
+               if (n > aoe_deadsecs && !(f->flags & FFL_PROBE)) {
                        /* Waited too long.  Device failure.
                         * Hang all frames on first hash bucket for downdev
                         * to clean up.
                }
 
                t = f->t;
-               if (n > aoe_deadsecs/2)
-                       d->htgt = t; /* see if another target can help */
+               n = f->waited + since;
+               n /= USEC_PER_SEC;
+               if (aoe_deadsecs && utgts > 0
+               && (n > aoe_deadsecs / utgts || n > HARD_SCORN_SECS))
+                       scorn(t); /* avoid this target */
 
                if (t->maxout != 1) {
                        t->ssthresh = t->maxout / 2;
                        t->maxout = 1;
                }
 
-               ifp = getif(t, f->skb->dev);
-               if (ifp && ++ifp->lost > (t->nframes << 1)
-               && (ifp != t->ifs || t->ifs[1].nd)) {
-                       ejectif(t, ifp);
-                       ifp = NULL;
+               if (f->flags & FFL_PROBE) {
+                       t->nout_probes--;
+               } else {
+                       ifp = getif(t, f->skb->dev);
+                       if (ifp && ++ifp->lost > (t->nframes << 1)
+                       && (ifp != t->ifs || t->ifs[1].nd)) {
+                               ejectif(t, ifp);
+                               ifp = NULL;
+                       }
                }
                list_move_tail(pos, &d->rexmitq);
                t->nout--;
        rexmit_deferred(d);
 
 out:
-       if ((d->flags & DEVFL_KICKME || d->htgt) && d->blkq) {
+       if ((d->flags & DEVFL_KICKME) && d->blkq) {
                d->flags &= ~DEVFL_KICKME;
                d->blkq->request_fn(d->blkq);
        }
 void
 aoecmd_work(struct aoedev *d)
 {
-       if (d->htgt && !sthtith(d))
-               return;
        rexmit_deferred(d);
        while (aoecmd_ata_rw(d))
                ;
        struct aoeif *ifp;
        struct aoedev *d;
        long n;
+       int untainted;
 
        if (f == NULL)
                return;
 
        t = f->t;
        d = t->d;
+       skb = f->r_skb;
+       buf = f->buf;
+       if (f->flags & FFL_PROBE)
+               goto out;
+       if (!skb)               /* just fail the buf. */
+               goto noskb;
 
        hout = (struct aoe_hdr *) skb_mac_header(f->skb);
        ahout = (struct aoe_atahdr *) (hout+1);
-       buf = f->buf;
-       skb = f->r_skb;
-       if (skb == NULL)
-               goto noskb;     /* just fail the buf. */
 
        hin = (struct aoe_hdr *) skb->data;
        skb_pull(skb, sizeof(*hin));
                        d->aoemajor, d->aoeminor);
 noskb:         if (buf)
                        clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
-               goto badrsp;
+               goto out;
        }
 
        n = ahout->scnt << 9;
                ifp = getif(t, skb->dev);
                if (ifp)
                        ifp->lost = 0;
-               if (d->htgt == t) /* I'll help myself, thank you. */
-                       d->htgt = NULL;
                spin_unlock_irq(&d->lock);
                break;
        case ATA_CMD_ID_ATA:
                        be16_to_cpu(get_unaligned(&hin->major)),
                        hin->minor);
        }
-badrsp:
+out:
        spin_lock_irq(&d->lock);
+       if (t->taint > 0
+       && --t->taint > 0
+       && t->nout_probes == 0) {
+               count_targets(d, &untainted);
+               if (untainted > 0) {
+                       probe(t);
+                       t->nout_probes++;
+               }
+       }
 
        aoe_freetframe(f);
 
        if (f) {
                calc_rttavg(d, f->t, tsince_hr(f));
                f->t->nout--;
+               if (f->flags & FFL_PROBE)
+                       f->t->nout_probes--;
        } else {
                f = getframe_deferred(d, n);
                if (f) {
        memcpy(t->addr, addr, sizeof t->addr);
        t->ifp = t->ifs;
        aoecmd_wreset(t);
+       t->maxout = t->nframes / 2;
        INIT_LIST_HEAD(&t->ffree);
        return *tt = t;
 }
 int __init
 aoecmd_init(void)
 {
+       void *p;
+
+       /* get_zeroed_page returns page with ref count 1 */
+       p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
+       if (!p)
+               return -ENOMEM;
+       empty_page = virt_to_page(p);
+
        INIT_LIST_HEAD(&iocq.head);
        spin_lock_init(&iocq.lock);
        init_waitqueue_head(&ktiowq);
 {
        aoe_ktstop(&kts);
        aoe_flush_iocq();
+
+       free_page((unsigned long) page_address(empty_page));
+       empty_page = NULL;
 }