From: Benjamin Berg Date: Wed, 3 Jul 2024 09:58:57 +0000 (+0300) Subject: wifi: iwlwifi: release TXQ lock during reclaim X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=a2ed933dfefac8976ac915e1b1b60028b9c344b3;p=users%2Fwilly%2Fxarray.git wifi: iwlwifi: release TXQ lock during reclaim Much of the work during reclaim can be done without holding the TXQ lock and releasing the lock means that command submission can happen at the same time. Add a new reclaim_lock to prevent parallel cleanup. Release the lock while working with an internal copy of the txq->read_ptr and only take the lock again when updating the read pointer after the cleanup is done. Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240703125541.2a81021d49ac.I53698ae92fb75a0461d41176db115462cf8be1cd@changeid Signed-off-by: Johannes Berg --- diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 015f02122df6..6148acbac6af 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -752,6 +752,7 @@ struct iwl_pcie_first_tb_buf { * @first_tb_dma: DMA address for the first_tb_bufs start * @entries: transmit entries (driver state) * @lock: queue lock + * @reclaim_lock: reclaim lock * @stuck_timer: timer that fires if queue gets stuck * @trans: pointer back to transport (for timer) * @need_update: indicates need to update read/write index @@ -794,6 +795,8 @@ struct iwl_txq { struct iwl_pcie_txq_entry *entries; /* lock for syncing changes on the queue */ spinlock_t lock; + /* lock to prevent concurrent reclaim */ + spinlock_t reclaim_lock; unsigned long frozen_expiry_remainder; struct timer_list stuck_timer; struct iwl_trans *trans; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 21c3998a76c8..2e780fb2da42 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -821,7 +821,8 @@ static void iwl_txq_gen2_unmap(struct iwl_trans *trans, int txq_id) struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id]; - spin_lock_bh(&txq->lock); + spin_lock_bh(&txq->reclaim_lock); + spin_lock(&txq->lock); while (txq->write_ptr != txq->read_ptr) { IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n", txq_id, txq->read_ptr); @@ -844,7 +845,8 @@ static void iwl_txq_gen2_unmap(struct iwl_trans *trans, int txq_id) iwl_op_mode_free_skb(trans->op_mode, skb); } - spin_unlock_bh(&txq->lock); + spin_unlock(&txq->lock); + spin_unlock_bh(&txq->reclaim_lock); /* just in case - this queue may have been stopped */ iwl_trans_pcie_wake_queue(trans, txq); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 1f6db6b90f6f..748772fa6b3e 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -333,20 +333,21 @@ static void iwl_txq_gen1_tfd_unmap(struct iwl_trans *trans, * iwl_txq_free_tfd - Free all chunks referenced by TFD [txq->q.read_ptr] * @trans: transport private data * @txq: tx queue + * @read_ptr: the TXQ read_ptr to free * * Does NOT advance any TFD circular buffer read/write indexes * Does NOT free the TFD itself (which is within circular buffer) */ -static void iwl_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) +static void iwl_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq, + int read_ptr) { /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and * idx is bounded by n_window */ - int rd_ptr = txq->read_ptr; - int idx = iwl_txq_get_cmd_index(txq, rd_ptr); + int idx = iwl_txq_get_cmd_index(txq, read_ptr); struct sk_buff *skb; - lockdep_assert_held(&txq->lock); + lockdep_assert_held(&txq->reclaim_lock); if (!txq->entries) return; @@ -356,10 +357,10 @@ static void iwl_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) */ if (trans->trans_cfg->gen2) iwl_txq_gen2_tfd_unmap(trans, &txq->entries[idx].meta, - iwl_txq_get_tfd(trans, txq, rd_ptr)); + iwl_txq_get_tfd(trans, txq, read_ptr)); else iwl_txq_gen1_tfd_unmap(trans, &txq->entries[idx].meta, - txq, rd_ptr); + txq, read_ptr); /* free SKB */ skb = txq->entries[idx].skb; @@ -387,7 +388,8 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) return; } - spin_lock_bh(&txq->lock); + spin_lock_bh(&txq->reclaim_lock); + spin_lock(&txq->lock); while (txq->write_ptr != txq->read_ptr) { IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n", txq_id, txq->read_ptr); @@ -402,7 +404,7 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) iwl_pcie_free_tso_pages(trans, skb, cmd_meta); } - iwl_txq_free_tfd(trans, txq); + iwl_txq_free_tfd(trans, txq, txq->read_ptr); txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr); if (txq->read_ptr == txq->write_ptr && @@ -416,7 +418,8 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) iwl_op_mode_free_skb(trans->op_mode, skb); } - spin_unlock_bh(&txq->lock); + spin_unlock(&txq->lock); + spin_unlock_bh(&txq->reclaim_lock); /* just in case - this queue may have been stopped */ iwl_trans_pcie_wake_queue(trans, txq); @@ -921,6 +924,7 @@ int iwl_txq_init(struct iwl_trans *trans, struct iwl_txq *txq, return ret; spin_lock_init(&txq->lock); + spin_lock_init(&txq->reclaim_lock); if (cmd_queue) { static struct lock_class_key iwl_txq_cmd_queue_lock_class; @@ -1055,11 +1059,12 @@ static void iwl_txq_progress(struct iwl_txq *txq) mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); } -static inline bool iwl_txq_used(const struct iwl_txq *q, int i) +static inline bool iwl_txq_used(const struct iwl_txq *q, int i, + int read_ptr, int write_ptr) { int index = iwl_txq_get_cmd_index(q, i); - int r = iwl_txq_get_cmd_index(q, q->read_ptr); - int w = iwl_txq_get_cmd_index(q, q->write_ptr); + int r = iwl_txq_get_cmd_index(q, read_ptr); + int w = iwl_txq_get_cmd_index(q, write_ptr); return w >= r ? (index >= r && index < w) : @@ -1086,7 +1091,7 @@ static void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx) r = iwl_txq_get_cmd_index(txq, txq->read_ptr); if (idx >= trans->trans_cfg->base_params->max_tfd_queue_size || - (!iwl_txq_used(txq, idx))) { + (!iwl_txq_used(txq, idx, txq->read_ptr, txq->write_ptr))) { WARN_ONCE(test_bit(txq_id, trans_pcie->txqs.queue_used), "%s: Read index for DMA queue txq id (%d), index %d is out of range [0-%d] %d %d.\n", __func__, txq_id, idx, @@ -2284,12 +2289,12 @@ out_err: } static void iwl_txq_gen1_inval_byte_cnt_tbl(struct iwl_trans *trans, - struct iwl_txq *txq) + struct iwl_txq *txq, + int read_ptr) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwlagn_scd_bc_tbl *scd_bc_tbl = trans_pcie->txqs.scd_bc_tbls.addr; int txq_id = txq->id; - int read_ptr = txq->read_ptr; u8 sta_id = 0; __le16 bc_ent; struct iwl_device_tx_cmd *dev_cmd = txq->entries[read_ptr].cmd; @@ -2316,6 +2321,7 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id]; int tfd_num, read_ptr, last_to_free; + int txq_read_ptr, txq_write_ptr; /* This function is not meant to release cmd queue*/ if (WARN_ON(txq_id == trans_pcie->txqs.cmd.q_id)) @@ -2326,8 +2332,14 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, tfd_num = iwl_txq_get_cmd_index(txq, ssn); - spin_lock_bh(&txq->lock); - read_ptr = iwl_txq_get_cmd_index(txq, txq->read_ptr); + spin_lock_bh(&txq->reclaim_lock); + + spin_lock(&txq->lock); + txq_read_ptr = txq->read_ptr; + txq_write_ptr = txq->write_ptr; + spin_unlock(&txq->lock); + + read_ptr = iwl_txq_get_cmd_index(txq, txq_read_ptr); if (!test_bit(txq_id, trans_pcie->txqs.queue_used)) { IWL_DEBUG_TX_QUEUES(trans, "Q %d inactive - ignoring idx %d\n", @@ -2339,19 +2351,19 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, goto out; IWL_DEBUG_TX_REPLY(trans, "[Q %d] %d (%d) -> %d (%d)\n", - txq_id, read_ptr, txq->read_ptr, tfd_num, ssn); + txq_id, read_ptr, txq_read_ptr, tfd_num, ssn); /* Since we free until index _not_ inclusive, the one before index is * the last we will free. This one must be used */ last_to_free = iwl_txq_dec_wrap(trans, tfd_num); - if (!iwl_txq_used(txq, last_to_free)) { + if (!iwl_txq_used(txq, last_to_free, txq_read_ptr, txq_write_ptr)) { IWL_ERR(trans, "%s: Read index for txq id (%d), last_to_free %d is out of range [0-%d] %d %d.\n", __func__, txq_id, last_to_free, trans->trans_cfg->base_params->max_tfd_queue_size, - txq->write_ptr, txq->read_ptr); + txq_write_ptr, txq_read_ptr); iwl_op_mode_time_point(trans->op_mode, IWL_FW_INI_TIME_POINT_FAKE_TX, @@ -2364,13 +2376,13 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, for (; read_ptr != tfd_num; - txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr), - read_ptr = iwl_txq_get_cmd_index(txq, txq->read_ptr)) { + txq_read_ptr = iwl_txq_inc_wrap(trans, txq_read_ptr), + read_ptr = iwl_txq_get_cmd_index(txq, txq_read_ptr)) { struct iwl_cmd_meta *cmd_meta = &txq->entries[read_ptr].meta; struct sk_buff *skb = txq->entries[read_ptr].skb; if (WARN_ONCE(!skb, "no SKB at %d (%d) on queue %d\n", - read_ptr, txq->read_ptr, txq_id)) + read_ptr, txq_read_ptr, txq_id)) continue; iwl_pcie_free_tso_pages(trans, skb, cmd_meta); @@ -2380,11 +2392,15 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, txq->entries[read_ptr].skb = NULL; if (!trans->trans_cfg->gen2) - iwl_txq_gen1_inval_byte_cnt_tbl(trans, txq); + iwl_txq_gen1_inval_byte_cnt_tbl(trans, txq, + txq_read_ptr); - iwl_txq_free_tfd(trans, txq); + iwl_txq_free_tfd(trans, txq, txq_read_ptr); } + spin_lock(&txq->lock); + txq->read_ptr = txq_read_ptr; + iwl_txq_progress(txq); if (iwl_txq_space(trans, txq) > txq->low_mark && @@ -2406,11 +2422,10 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, txq->overflow_tx = true; /* - * This is tricky: we are in reclaim path which is non - * re-entrant, so noone will try to take the access the - * txq data from that path. We stopped tx, so we can't - * have tx as well. Bottom line, we can unlock and re-lock - * later. + * This is tricky: we are in reclaim path and are holding + * reclaim_lock, so noone will try to access the txq data + * from that path. We stopped tx, so we can't have tx as well. + * Bottom line, we can unlock and re-lock later. */ spin_unlock(&txq->lock); @@ -2435,8 +2450,9 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, txq->overflow_tx = false; } + spin_unlock(&txq->lock); out: - spin_unlock_bh(&txq->lock); + spin_unlock_bh(&txq->reclaim_lock); } /* Set wr_ptr of specific device and txq */