/* Check if there are enough reserved descriptors for transmission.
  * If not, request chunk of reserved descriptors and check again.
  */
-static int mvpp2_txq_reserved_desc_num_proc(struct mvpp2 *priv,
+static int mvpp2_txq_reserved_desc_num_proc(struct mvpp2_port *port,
                                            struct mvpp2_tx_queue *txq,
                                            struct mvpp2_txq_pcpu *txq_pcpu,
                                            int num)
 {
+       struct mvpp2 *priv = port->priv;
        int req, desc_count;
-       unsigned int cpu;
+       unsigned int thread;
 
        if (txq_pcpu->reserved_num >= num)
                return 0;
 
        desc_count = 0;
        /* Compute total of used descriptors */
-       for_each_present_cpu(cpu) {
+       for (thread = 0; thread < port->nthreads; thread++) {
                struct mvpp2_txq_pcpu *txq_pcpu_aux;
 
-               txq_pcpu_aux = per_cpu_ptr(txq->pcpu, cpu);
+               txq_pcpu_aux = per_cpu_ptr(txq->pcpu, thread);
                desc_count += txq_pcpu_aux->count;
                desc_count += txq_pcpu_aux->reserved_num;
        }
        desc_count += req;
 
        if (desc_count >
-          (txq->size - (num_present_cpus() * MVPP2_CPU_DESC_CHUNK)))
+          (txq->size - (MVPP2_MAX_THREADS * MVPP2_CPU_DESC_CHUNK)))
                return -ENOMEM;
 
        txq_pcpu->reserved_num += mvpp2_txq_alloc_reserved_desc(priv, txq, req);
        struct netdev_queue *nq = netdev_get_tx_queue(port->dev, txq->log_id);
        int tx_done;
 
-       if (txq_pcpu->cpu != smp_processor_id())
+       if (txq_pcpu->thread != mvpp2_cpu_to_thread(smp_processor_id()))
                netdev_err(port->dev, "wrong cpu on the end of Tx processing\n");
 
        tx_done = mvpp2_txq_sent_desc_proc(port, txq);
                          struct mvpp2_tx_queue *txq)
 {
        u32 val;
-       unsigned int cpu, thread;
+       unsigned int thread;
        int desc, desc_per_txq, tx_port_num;
        struct mvpp2_txq_pcpu *txq_pcpu;
 
        mvpp2_write(port->priv, MVPP2_TXQ_SCHED_TOKEN_SIZE_REG(txq->log_id),
                    val);
 
-       for_each_present_cpu(cpu) {
-               txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
+       for (thread = 0; thread < port->nthreads; thread++) {
+               txq_pcpu = per_cpu_ptr(txq->pcpu, thread);
                txq_pcpu->size = txq->size;
                txq_pcpu->buffs = kmalloc_array(txq_pcpu->size,
                                                sizeof(*txq_pcpu->buffs),
                             struct mvpp2_tx_queue *txq)
 {
        struct mvpp2_txq_pcpu *txq_pcpu;
-       unsigned int cpu, thread;
+       unsigned int thread;
 
-       for_each_present_cpu(cpu) {
-               txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
+       for (thread = 0; thread < port->nthreads; thread++) {
+               txq_pcpu = per_cpu_ptr(txq->pcpu, thread);
                kfree(txq_pcpu->buffs);
 
                if (txq_pcpu->tso_headers)
 {
        struct mvpp2_txq_pcpu *txq_pcpu;
        int delay, pending;
-       unsigned int cpu, thread = mvpp2_cpu_to_thread(get_cpu());
+       unsigned int thread = mvpp2_cpu_to_thread(get_cpu());
        u32 val;
 
        mvpp2_percpu_write(port->priv, thread, MVPP2_TXQ_NUM_REG, txq->id);
        mvpp2_percpu_write(port->priv, thread, MVPP2_TXQ_PREF_BUF_REG, val);
        put_cpu();
 
-       for_each_present_cpu(cpu) {
-               txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
+       for (thread = 0; thread < port->nthreads; thread++) {
+               txq_pcpu = per_cpu_ptr(txq->pcpu, thread);
 
                /* Release all packets */
                mvpp2_txq_bufs_free(port, txq, txq_pcpu, txq_pcpu->count);
 {
        struct net_device *dev = (struct net_device *)data;
        struct mvpp2_port *port = netdev_priv(dev);
-       struct mvpp2_port_pcpu *port_pcpu = this_cpu_ptr(port->pcpu);
+       struct mvpp2_port_pcpu *port_pcpu;
        unsigned int tx_todo, cause;
 
+       port_pcpu = per_cpu_ptr(port->pcpu,
+                               mvpp2_cpu_to_thread(smp_processor_id()));
+
        if (!netif_running(dev))
                return;
        port_pcpu->timer_scheduled = false;
 
        /* Process all the Tx queues */
        cause = (1 << port->ntxqs) - 1;
-       tx_todo = mvpp2_tx_done(port, cause, smp_processor_id());
+       tx_todo = mvpp2_tx_done(port, cause,
+                               mvpp2_cpu_to_thread(smp_processor_id()));
 
        /* Set the timer in case not all the packets were processed */
        if (tx_todo)
 tx_desc_unmap_put(struct mvpp2_port *port, struct mvpp2_tx_queue *txq,
                  struct mvpp2_tx_desc *desc)
 {
-       struct mvpp2_txq_pcpu *txq_pcpu = this_cpu_ptr(txq->pcpu);
+       unsigned int thread = mvpp2_cpu_to_thread(smp_processor_id());
+       struct mvpp2_txq_pcpu *txq_pcpu = per_cpu_ptr(txq->pcpu, thread);
 
        dma_addr_t buf_dma_addr =
                mvpp2_txdesc_dma_addr_get(port, desc);
                                 struct mvpp2_tx_queue *aggr_txq,
                                 struct mvpp2_tx_queue *txq)
 {
-       struct mvpp2_txq_pcpu *txq_pcpu = this_cpu_ptr(txq->pcpu);
+       unsigned int thread = mvpp2_cpu_to_thread(smp_processor_id());
+       struct mvpp2_txq_pcpu *txq_pcpu = per_cpu_ptr(txq->pcpu, thread);
        struct mvpp2_tx_desc *tx_desc;
        int i;
        dma_addr_t buf_dma_addr;
        /* Check number of available descriptors */
        if (mvpp2_aggr_desc_num_check(port->priv, aggr_txq,
                                      tso_count_descs(skb)) ||
-           mvpp2_txq_reserved_desc_num_proc(port->priv, txq, txq_pcpu,
+           mvpp2_txq_reserved_desc_num_proc(port, txq, txq_pcpu,
                                             tso_count_descs(skb)))
                return 0;
 
        struct mvpp2_txq_pcpu *txq_pcpu;
        struct mvpp2_tx_desc *tx_desc;
        dma_addr_t buf_dma_addr;
+       unsigned int thread;
        int frags = 0;
        u16 txq_id;
        u32 tx_cmd;
 
+       thread = mvpp2_cpu_to_thread(smp_processor_id());
+
        txq_id = skb_get_queue_mapping(skb);
        txq = port->txqs[txq_id];
-       txq_pcpu = this_cpu_ptr(txq->pcpu);
-       aggr_txq = &port->priv->aggr_txqs[smp_processor_id()];
+       txq_pcpu = per_cpu_ptr(txq->pcpu, thread);
+       aggr_txq = &port->priv->aggr_txqs[thread];
 
        if (skb_is_gso(skb)) {
                frags = mvpp2_tx_tso(skb, dev, txq, aggr_txq, txq_pcpu);
 
        /* Check number of available descriptors */
        if (mvpp2_aggr_desc_num_check(port->priv, aggr_txq, frags) ||
-           mvpp2_txq_reserved_desc_num_proc(port->priv, txq,
-                                            txq_pcpu, frags)) {
+           mvpp2_txq_reserved_desc_num_proc(port, txq, txq_pcpu, frags)) {
                frags = 0;
                goto out;
        }
 
 out:
        if (frags > 0) {
-               struct mvpp2_pcpu_stats *stats = this_cpu_ptr(port->stats);
+               struct mvpp2_pcpu_stats *stats = per_cpu_ptr(port->stats, thread);
                struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
 
                txq_pcpu->reserved_num -= frags;
        /* Set the timer in case not all frags were processed */
        if (!port->has_tx_irqs && txq_pcpu->count <= frags &&
            txq_pcpu->count > 0) {
-               struct mvpp2_port_pcpu *port_pcpu = this_cpu_ptr(port->pcpu);
+               struct mvpp2_port_pcpu *port_pcpu = per_cpu_ptr(port->pcpu, thread);
 
                mvpp2_timer_set(port_pcpu);
        }
 {
        struct mvpp2_port *port = netdev_priv(dev);
        struct mvpp2_port_pcpu *port_pcpu;
-       unsigned int cpu;
+       unsigned int thread;
 
        mvpp2_stop_dev(port);
 
 
        mvpp2_irqs_deinit(port);
        if (!port->has_tx_irqs) {
-               for_each_present_cpu(cpu) {
-                       port_pcpu = per_cpu_ptr(port->pcpu, cpu);
+               for (thread = 0; thread < port->nthreads; thread++) {
+                       port_pcpu = per_cpu_ptr(port->pcpu, thread);
 
                        hrtimer_cancel(&port_pcpu->tx_done_timer);
                        port_pcpu->timer_scheduled = false;
        struct device *dev = port->dev->dev.parent;
        struct mvpp2 *priv = port->priv;
        struct mvpp2_txq_pcpu *txq_pcpu;
-       unsigned int cpu;
+       unsigned int thread;
        int queue, err;
 
        /* Checks for hardware constraints */
                txq->id = queue_phy_id;
                txq->log_id = queue;
                txq->done_pkts_coal = MVPP2_TXDONE_COAL_PKTS_THRESH;
-               for_each_present_cpu(cpu) {
-                       txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
-                       txq_pcpu->cpu = cpu;
+               for (thread = 0; thread < port->nthreads; thread++) {
+                       txq_pcpu = per_cpu_ptr(txq->pcpu, thread);
+                       txq_pcpu->thread = thread;
                }
 
                port->txqs[queue] = txq;
        struct resource *res;
        struct phylink *phylink;
        char *mac_from = "";
-       unsigned int ntxqs, nrxqs, cpu;
+       unsigned int ntxqs, nrxqs, thread;
        unsigned long flags = 0;
        bool has_tx_irqs;
        u32 id;
        port->has_tx_irqs = has_tx_irqs;
        port->flags = flags;
 
+       port->nthreads = min_t(unsigned int, num_present_cpus(),
+                              MVPP2_MAX_THREADS);
+
        err = mvpp2_queue_vectors_init(port, port_node);
        if (err)
                goto err_free_netdev;
        }
 
        if (!port->has_tx_irqs) {
-               for_each_present_cpu(cpu) {
-                       port_pcpu = per_cpu_ptr(port->pcpu, cpu);
+               for (thread = 0; thread < port->nthreads; thread++) {
+                       port_pcpu = per_cpu_ptr(port->pcpu, thread);
 
                        hrtimer_init(&port_pcpu->tx_done_timer, CLOCK_MONOTONIC,
                                     HRTIMER_MODE_REL_PINNED);
        }
 
        /* Allocate and initialize aggregated TXQs */
-       priv->aggr_txqs = devm_kcalloc(&pdev->dev, num_present_cpus(),
+       priv->aggr_txqs = devm_kcalloc(&pdev->dev, MVPP2_MAX_THREADS,
                                       sizeof(*priv->aggr_txqs),
                                       GFP_KERNEL);
        if (!priv->aggr_txqs)
                return -ENOMEM;
 
-       for_each_present_cpu(i) {
+       for (i = 0; i < MVPP2_MAX_THREADS; i++) {
                priv->aggr_txqs[i].id = i;
                priv->aggr_txqs[i].size = MVPP2_AGGR_TXQ_SIZE;
                err = mvpp2_aggr_txq_init(pdev, &priv->aggr_txqs[i], i, priv);
                mvpp2_bm_pool_destroy(pdev, priv, bm_pool);
        }
 
-       for_each_present_cpu(i) {
+       for (i = 0; i < MVPP2_MAX_THREADS; i++) {
                struct mvpp2_tx_queue *aggr_txq = &priv->aggr_txqs[i];
 
                dma_free_coherent(&pdev->dev,