Currently if a user enqueue a work item using schedule_delayed_work() the
used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use
WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to
schedule_work() that is using system_wq and queue_work(), that makes use
again of WORK_CPU_UNBOUND.
This lack of consistentcy cannot be addressed without refactoring the API.
alloc_workqueue() treats all queues as per-CPU by default, while unbound
workqueues must opt-in via WQ_UNBOUND.
This default is suboptimal: most workloads benefit from unbound queues,
allowing the scheduler to place worker threads where they’re needed and
reducing noise when CPUs are isolated.
This change adds a new WQ_PERCPU flag at the network subsystem, to explicitly
request the use of the per-CPU behavior. Both flags coexist for one release
cycle to allow callers to transition their calls.
Once migration is complete, WQ_UNBOUND can be removed and unbound will
become the implicit default.
With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND),
any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND
must now use WQ_PERCPU.
All existing users have been updated accordingly.
Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
Link: https://patch.msgid.link/20250918142427.309519-4-marco.crivellari@suse.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
goto out_close;
}
- priv->wq = alloc_workqueue("hi3110_wq", WQ_FREEZABLE | WQ_MEM_RECLAIM,
+ priv->wq = alloc_workqueue("hi3110_wq",
+ WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU,
0);
if (!priv->wq) {
ret = -ENOMEM;
if (ret)
goto out_clk;
- priv->wq = alloc_workqueue("mcp251x_wq", WQ_FREEZABLE | WQ_MEM_RECLAIM,
+ priv->wq = alloc_workqueue("mcp251x_wq",
+ WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU,
0);
if (!priv->wq) {
ret = -ENOMEM;
q_no = lio->linfo.rxpciq[q].s.q_no;
wq = &lio->rxq_status_wq[q_no];
wq->wq = alloc_workqueue("rxq-oom-status",
- WQ_MEM_RECLAIM, 0);
+ WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (!wq->wq) {
dev_err(&oct->pci_dev->dev, "unable to create cavium rxq oom status wq\n");
return -ENOMEM;
struct octeon_device *oct = lio->oct_dev;
lio->link_status_wq.wq = alloc_workqueue("link-status",
- WQ_MEM_RECLAIM, 0);
+ WQ_MEM_RECLAIM | WQ_PERCPU,
+ 0);
if (!lio->link_status_wq.wq) {
dev_err(&oct->pci_dev->dev, "unable to create cavium link status wq\n");
return -1;
struct octeon_device *oct = lio->oct_dev;
lio->sync_octeon_time_wq.wq =
- alloc_workqueue("update-octeon-time", WQ_MEM_RECLAIM, 0);
+ alloc_workqueue("update-octeon-time",
+ WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (!lio->sync_octeon_time_wq.wq) {
dev_err(&oct->pci_dev->dev, "Unable to create wq to update octeon time\n");
return -1;
struct octeon_device *oct = lio->oct_dev;
lio->txq_status_wq.wq = alloc_workqueue("txq-status",
- WQ_MEM_RECLAIM, 0);
+ WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (!lio->txq_status_wq.wq) {
dev_err(&oct->pci_dev->dev, "unable to create cavium txq status wq\n");
return -1;
struct octeon_device *oct = lio->oct_dev;
lio->link_status_wq.wq = alloc_workqueue("link-status",
- WQ_MEM_RECLAIM, 0);
+ WQ_MEM_RECLAIM | WQ_PERCPU,
+ 0);
if (!lio->link_status_wq.wq) {
dev_err(&oct->pci_dev->dev, "unable to create cavium link status wq\n");
return -1;
oct->fn_list.setup_iq_regs(oct, iq_no);
oct->check_db_wq[iq_no].wq = alloc_workqueue("check_iq_db",
- WQ_MEM_RECLAIM,
+ WQ_MEM_RECLAIM | WQ_PERCPU,
0);
if (!oct->check_db_wq[iq_no].wq) {
vfree(iq->request_list);
}
spin_lock_init(&oct->cmd_resp_wqlock);
- oct->dma_comp_wq.wq = alloc_workqueue("dma-comp", WQ_MEM_RECLAIM, 0);
+ oct->dma_comp_wq.wq = alloc_workqueue("dma-comp",
+ WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (!oct->dma_comp_wq.wq) {
dev_err(&oct->pci_dev->dev, "failed to create wq thread\n");
return -ENOMEM;
priv->tx_tstamp_type = HWTSTAMP_TX_OFF;
priv->rx_tstamp = false;
- priv->dpaa2_ptp_wq = alloc_workqueue("dpaa2_ptp_wq", 0, 0);
+ priv->dpaa2_ptp_wq = alloc_workqueue("dpaa2_ptp_wq", WQ_PERCPU, 0);
if (!priv->dpaa2_ptp_wq) {
err = -ENOMEM;
goto err_wq_alloc;
{
pr_debug("%s is initializing\n", HCLGE_NAME);
- hclge_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, HCLGE_NAME);
+ hclge_wq = alloc_workqueue("%s", WQ_UNBOUND, 0,
+ HCLGE_NAME);
if (!hclge_wq) {
pr_err("%s: failed to create workqueue\n", HCLGE_NAME);
return -ENOMEM;
pr_info("%s\n", fm10k_copyright);
/* create driver workqueue */
- fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
+ fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM | WQ_PERCPU, 0,
fm10k_driver_name);
if (!fm10k_workqueue)
return -ENOMEM;
* since we need to be able to guarantee forward progress even under
* memory pressure.
*/
- i40e_wq = alloc_workqueue("%s", 0, 0, i40e_driver_name);
+ i40e_wq = alloc_workqueue("%s", WQ_PERCPU, 0, i40e_driver_name);
if (!i40e_wq) {
pr_err("%s: Failed to create workqueue\n", i40e_driver_name);
return -ENOMEM;
/* init wq for processing linkup requests */
INIT_WORK(&cgx->cgx_cmd_work, cgx_lmac_linkup_work);
- cgx->cgx_cmd_workq = alloc_workqueue("cgx_cmd_workq", 0, 0);
+ cgx->cgx_cmd_workq = alloc_workqueue("cgx_cmd_workq", WQ_PERCPU, 0);
if (!cgx->cgx_cmd_workq) {
dev_err(dev, "alloc workqueue failed for cgx cmd");
err = -ENOMEM;
/* Initialize the wq for handling mcs interrupts */
INIT_LIST_HEAD(&rvu->mcs_intrq_head);
INIT_WORK(&rvu->mcs_intr_work, mcs_intr_handler_task);
- rvu->mcs_intr_wq = alloc_workqueue("mcs_intr_wq", 0, 0);
+ rvu->mcs_intr_wq = alloc_workqueue("mcs_intr_wq", WQ_PERCPU, 0);
if (!rvu->mcs_intr_wq) {
dev_err(rvu->dev, "mcs alloc workqueue failed\n");
return -ENOMEM;
spin_lock_init(&rvu->cgx_evq_lock);
INIT_LIST_HEAD(&rvu->cgx_evq_head);
INIT_WORK(&rvu->cgx_evh_work, cgx_evhandler_task);
- rvu->cgx_evh_wq = alloc_workqueue("rvu_evh_wq", 0, 0);
+ rvu->cgx_evh_wq = alloc_workqueue("rvu_evh_wq", WQ_PERCPU, 0);
if (!rvu->cgx_evh_wq) {
dev_err(rvu->dev, "alloc workqueue failed");
return -ENOMEM;
spin_lock_init(&rvu->rep_evtq_lock);
INIT_LIST_HEAD(&rvu->rep_evtq_head);
INIT_WORK(&rvu->rep_evt_work, rvu_rep_wq_handler);
- rvu->rep_evt_wq = alloc_workqueue("rep_evt_wq", 0, 0);
+ rvu->rep_evt_wq = alloc_workqueue("rep_evt_wq", WQ_PERCPU, 0);
if (!rvu->rep_evt_wq) {
dev_err(rvu->dev, "REP workqueue allocation failed\n");
return -ENOMEM;
pf->ipsec.sa_size = sa_size;
INIT_WORK(&pf->ipsec.sa_work, cn10k_ipsec_sa_wq_handler);
- pf->ipsec.sa_workq = alloc_workqueue("cn10k_ipsec_sa_workq", 0, 0);
+ pf->ipsec.sa_workq = alloc_workqueue("cn10k_ipsec_sa_workq",
+ WQ_PERCPU, 0);
if (!pf->ipsec.sa_workq) {
netdev_err(pf->netdev, "SA alloc workqueue failed\n");
return -ENOMEM;
static int __init prestera_module_init(void)
{
- prestera_wq = alloc_workqueue("prestera", 0, 0);
+ prestera_wq = alloc_workqueue("prestera", WQ_PERCPU, 0);
if (!prestera_wq)
return -ENOMEM;
dev_info(fw->dev.dev, "Prestera FW is ready\n");
- fw->wq = alloc_workqueue("prestera_fw_wq", WQ_HIGHPRI, 1);
+ fw->wq = alloc_workqueue("prestera_fw_wq", WQ_HIGHPRI | WQ_PERCPU, 1);
if (!fw->wq) {
err = -ENOMEM;
goto err_wq_alloc;
if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX))
return 0;
- emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0);
+ emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_PERCPU, 0);
if (!emad_wq)
return -ENOMEM;
mlxsw_core->emad_wq = emad_wq;
if (err)
return err;
- mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0);
+ mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_PERCPU, 0);
if (!mlxsw_wq) {
err = -ENOMEM;
goto err_alloc_workqueue;
pf->pdev = pdev;
pf->dev_info = dev_info;
- pf->wq = alloc_workqueue("nfp-%s", 0, 2, pci_name(pdev));
+ pf->wq = alloc_workqueue("nfp-%s", WQ_PERCPU, 2, pci_name(pdev));
if (!pf->wq) {
err = -ENOMEM;
goto err_pci_priv_unset;
hwfn = &cdev->hwfns[i];
hwfn->slowpath_wq = alloc_workqueue("slowpath-%02x:%02x.%02x",
- 0, 0, cdev->pdev->bus->number,
+ WQ_PERCPU, 0,
+ cdev->pdev->bus->number,
PCI_SLOT(cdev->pdev->devfn),
hwfn->abs_pf_id);
if (err < 0)
goto err_register;
- priv->xfer_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
+ priv->xfer_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM | WQ_PERCPU, 0,
netdev_name(ndev));
if (!priv->xfer_wq) {
err = -ENOMEM;
adapter->force_reset = false;
adapter->open_guard = false;
- adapter->txrx_wq = alloc_workqueue(DRV_NAME "/txrx", WQ_MEM_RECLAIM, 0);
+ adapter->txrx_wq = alloc_workqueue(DRV_NAME "/txrx",
+ WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (unlikely(!adapter->txrx_wq)) {
err = -ENOMEM;
goto err_free_netdev;
}
adapter->control_wq = alloc_workqueue(DRV_NAME "/control",
- WQ_MEM_RECLAIM, 0);
+ WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (unlikely(!adapter->control_wq)) {
err = -ENOMEM;
goto err_free_txrx_wq;
goto err_free_peer_hashtable;
wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s",
- WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name);
+ WQ_CPU_INTENSIVE | WQ_FREEZABLE | WQ_PERCPU, 0,
+ dev->name);
if (!wg->handshake_receive_wq)
goto err_free_index_hashtable;
goto err_destroy_handshake_receive;
wg->packet_crypt_wq = alloc_workqueue("wg-crypt-%s",
- WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0, dev->name);
+ WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_PERCPU, 0,
+ dev->name);
if (!wg->packet_crypt_wq)
goto err_destroy_handshake_send;
int t7xx_dpmaif_bat_rel_wq_alloc(struct dpmaif_ctrl *dpmaif_ctrl)
{
dpmaif_ctrl->bat_release_wq = alloc_workqueue("dpmaif_bat_release_work_queue",
- WQ_MEM_RECLAIM, 1);
+ WQ_MEM_RECLAIM | WQ_PERCPU,
+ 1);
if (!dpmaif_ctrl->bat_release_wq)
return -ENOMEM;
if (wwan_hwsim_devsnum < 0 || wwan_hwsim_devsnum > 128)
return -EINVAL;
- wwan_wq = alloc_workqueue("wwan_wq", 0, 0);
+ wwan_wq = alloc_workqueue("wwan_wq", WQ_PERCPU, 0);
if (!wwan_wq)
return -ENOMEM;
* The number of active work items is limited by the number of
* connections, so leave @max_active at default.
*/
- ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_MEM_RECLAIM, 0);
+ ceph_msgr_wq = alloc_workqueue("ceph-msgr",
+ WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (ceph_msgr_wq)
return 0;
static int __init sock_diag_init(void)
{
- broadcast_wq = alloc_workqueue("sock_diag_events", 0, 0);
+ broadcast_wq = alloc_workqueue("sock_diag_events", WQ_PERCPU, 0);
BUG_ON(!broadcast_wq);
return register_pernet_subsys(&diag_net_ops);
}
int rds_ib_mr_init(void)
{
- rds_ib_mr_wq = alloc_workqueue("rds_mr_flushd", WQ_MEM_RECLAIM, 0);
+ rds_ib_mr_wq = alloc_workqueue("rds_mr_flushd",
+ WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (!rds_ib_mr_wq)
return -ENOMEM;
return 0;
pr_info("Server registering\n");
- rxperf_workqueue = alloc_workqueue("rxperf", 0, 0);
+ rxperf_workqueue = alloc_workqueue("rxperf", WQ_PERCPU, 0);
if (!rxperf_workqueue)
goto error_workqueue;
rc = -ENOMEM;
- smc_tcp_ls_wq = alloc_workqueue("smc_tcp_ls_wq", 0, 0);
+ smc_tcp_ls_wq = alloc_workqueue("smc_tcp_ls_wq", WQ_PERCPU, 0);
if (!smc_tcp_ls_wq)
goto out_pnet;
- smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0);
+ smc_hs_wq = alloc_workqueue("smc_hs_wq", WQ_PERCPU, 0);
if (!smc_hs_wq)
goto out_alloc_tcp_ls_wq;
- smc_close_wq = alloc_workqueue("smc_close_wq", 0, 0);
+ smc_close_wq = alloc_workqueue("smc_close_wq", WQ_PERCPU, 0);
if (!smc_close_wq)
goto out_alloc_hs_wq;
rc = SMC_CLC_DECL_MEM;
goto ism_put_vlan;
}
- lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", 0, 0,
+ lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", WQ_PERCPU, 0,
SMC_LGR_ID_SIZE, &lgr->id);
if (!lgr->tx_wq) {
rc = -ENOMEM;
if (!dummy_page)
return -ENOMEM;
- destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
+ destruct_wq = alloc_workqueue("ktls_device_destruct", WQ_PERCPU, 0);
if (!destruct_wq) {
err = -ENOMEM;
goto err_free_dummy;
{
int ret;
- virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0);
+ virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", WQ_PERCPU, 0);
if (!virtio_vsock_workqueue)
return -ENOMEM;
struct vsock_loopback *vsock = &the_vsock_loopback;
int ret;
- vsock->workqueue = alloc_workqueue("vsock-loopback", 0, 0);
+ vsock->workqueue = alloc_workqueue("vsock-loopback", WQ_PERCPU, 0);
if (!vsock->workqueue)
return -ENOMEM;