From 1b45443b84872c010b36dfc2681413bb1a10011a Mon Sep 17 00:00:00 2001
From: Jijie Shao <shaojijie@huawei.com>
Date: Sat, 17 May 2025 17:58:28 +0800
Subject: [PATCH 01/16] net: hibmcge: fix wrong ndo.open() after reset fail
 issue.

If the driver reset fails, it may not work properly.
Therefore, the ndo.open() operation should be rejected.

In this patch, the driver calls netif_device_detach()
before the reset and calls netif_device_attach()
after the reset succeeds. If the reset fails,
netif_device_attach() is not called. Therefore,
netdev does not present and cannot be opened.

If reset fails, only the PCI reset (via sysfs)
can be used to attempt recovery.

Fixes: 3f5a61f6d504 ("net: hibmcge: Add reset supported in this module")
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250517095828.1763126-3-shaojijie@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c
index a0bcfb5a713d..ff3295b60a69 100644
--- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c
+++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c
@@ -61,6 +61,8 @@ static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type)
 		return -EBUSY;
 	}
 
+	netif_device_detach(priv->netdev);
+
 	priv->reset_type = type;
 	set_bit(HBG_NIC_STATE_RESETTING, &priv->state);
 	clear_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state);
@@ -91,6 +93,8 @@ static int hbg_reset_done(struct hbg_priv *priv, enum hbg_reset_type type)
 		return ret;
 	}
 
+	netif_device_attach(priv->netdev);
+
 	dev_info(&priv->pdev->dev, "reset done\n");
 	return ret;
 }
@@ -117,16 +121,13 @@ void hbg_err_reset(struct hbg_priv *priv)
 	if (running)
 		dev_close(priv->netdev);
 
-	hbg_reset(priv);
-
-	/* in hbg_pci_err_detected(), we will detach first,
-	 * so we need to attach before open
-	 */
-	if (!netif_device_present(priv->netdev))
-		netif_device_attach(priv->netdev);
+	if (hbg_reset(priv))
+		goto err_unlock;
 
 	if (running)
 		dev_open(priv->netdev, NULL);
+
+err_unlock:
 	rtnl_unlock();
 }
 
@@ -160,7 +161,6 @@ static pci_ers_result_t hbg_pci_err_slot_reset(struct pci_dev *pdev)
 	pci_save_state(pdev);
 
 	hbg_err_reset(priv);
-	netif_device_attach(netdev);
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
-- 
2.50.1


From 8d5ac187da10a8a5599b481588951470a2fb792b Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Tue, 20 May 2025 20:05:45 -0400
Subject: [PATCH 02/16] bcachefs: Fix casefold opt via xattr interface

Changing the casefold option requires extra checks/work - factor out a
helper from bch2_fileattr_set() for the xattr code to use.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/fs.c    | 26 +-------------------------
 fs/bcachefs/inode.c | 36 ++++++++++++++++++++++++++++++++++++
 fs/bcachefs/inode.h |  4 +++-
 fs/bcachefs/xattr.c |  6 ++++++
 4 files changed, 46 insertions(+), 26 deletions(-)

diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 4b742e62255b..47f1a64c5c8d 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1664,33 +1664,9 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans,
 		return -EINVAL;
 
 	if (s->casefold != bch2_inode_casefold(c, bi)) {
-#ifdef CONFIG_UNICODE
-		int ret = 0;
-		/* Not supported on individual files. */
-		if (!S_ISDIR(bi->bi_mode))
-			return -EOPNOTSUPP;
-
-		/*
-		 * Make sure the dir is empty, as otherwise we'd need to
-		 * rehash everything and update the dirent keys.
-		 */
-		ret = bch2_empty_dir_trans(trans, inode_inum(inode));
-		if (ret < 0)
-			return ret;
-
-		ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
+		int ret = bch2_inode_set_casefold(trans, inode_inum(inode), bi, s->casefold);
 		if (ret)
 			return ret;
-
-		bch2_check_set_feature(c, BCH_FEATURE_casefolding);
-
-		bi->bi_casefold = s->casefold + 1;
-		bi->bi_fields_set |= BIT(Inode_opt_casefold);
-
-#else
-		printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
-		return -EOPNOTSUPP;
-#endif
 	}
 
 	if (s->set_project) {
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index b51d98cf8a80..490b85841de9 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -14,6 +14,7 @@
 #include "extent_update.h"
 #include "fs.h"
 #include "inode.h"
+#include "namei.h"
 #include "opts.h"
 #include "str_hash.h"
 #include "snapshot.h"
@@ -1204,6 +1205,41 @@ int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_i
 	return 0;
 }
 
+int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum,
+			    struct bch_inode_unpacked *bi, unsigned v)
+{
+	struct bch_fs *c = trans->c;
+
+#ifdef CONFIG_UNICODE
+	int ret = 0;
+	/* Not supported on individual files. */
+	if (!S_ISDIR(bi->bi_mode))
+		return -EOPNOTSUPP;
+
+	/*
+	 * Make sure the dir is empty, as otherwise we'd need to
+	 * rehash everything and update the dirent keys.
+	 */
+	ret = bch2_empty_dir_trans(trans, inum);
+	if (ret < 0)
+		return ret;
+
+	ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
+	if (ret)
+		return ret;
+
+	bch2_check_set_feature(c, BCH_FEATURE_casefolding);
+
+	bi->bi_casefold = v + 1;
+	bi->bi_fields_set |= BIT(Inode_opt_casefold);
+
+	return 0;
+#else
+	bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE");
+	return -EOPNOTSUPP;
+#endif
+}
+
 static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
 {
 	struct bch_fs *c = trans->c;
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index c74af15b14f2..5cfba9e98966 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -292,7 +292,9 @@ static inline bool bch2_inode_should_have_single_bp(struct bch_inode_unpacked *i
 struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *);
 void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *,
 			 struct bch_inode_unpacked *);
-int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *);
+int bch2_inum_opts_get(struct btree_trans *, subvol_inum, struct bch_io_opts *);
+int bch2_inode_set_casefold(struct btree_trans *, subvol_inum,
+			    struct bch_inode_unpacked *, unsigned);
 
 #include "rebalance.h"
 
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index 651da52b2cbc..e6be32003f3b 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -473,6 +473,12 @@ static int inode_opt_set_fn(struct btree_trans *trans,
 {
 	struct inode_opt_set *s = p;
 
+	if (s->id == Inode_opt_casefold) {
+		int ret = bch2_inode_set_casefold(trans, inode_inum(inode), bi, s->v);
+		if (ret)
+			return ret;
+	}
+
 	if (s->defined)
 		bi->bi_fields_set |= 1U << s->id;
 	else
-- 
2.50.1


From ecd76c5f108eec3eea0a5b32007cda0dca6c92ac Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Wed, 21 May 2025 00:41:07 -0400
Subject: [PATCH 03/16] bcachefs: Fix bch2_dirent_create_snapshot() for
 casefolding

bch2_dirent_create_snapshot(), used in fsck, neglected to create a
casefolded dirent.

Just move this into dirent_create_key().

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/dirent.c | 33 +++++++++++++++------------------
 fs/bcachefs/dirent.h |  2 +-
 fs/bcachefs/fsck.c   |  2 ++
 fs/bcachefs/namei.c  |  2 --
 4 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
index 8a680e52c1ed..a51195088227 100644
--- a/fs/bcachefs/dirent.c
+++ b/fs/bcachefs/dirent.c
@@ -288,6 +288,7 @@ static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent,
 }
 
 static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
+				const struct bch_hash_info *hash_info,
 				subvol_inum dir,
 				u8 type,
 				const struct qstr *name,
@@ -295,10 +296,19 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
 				u64 dst)
 {
 	struct bkey_i_dirent *dirent;
+	struct qstr _cf_name;
 
 	if (name->len > BCH_NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
 
+	if (hash_info->cf_encoding && !cf_name) {
+		int ret = bch2_casefold(trans, hash_info, name, &_cf_name);
+		if (ret)
+			return ERR_PTR(ret);
+
+		cf_name = &_cf_name;
+	}
+
 	dirent = dirent_alloc_key(trans, dir, type, name->len, cf_name ? cf_name->len : 0, dst);
 	if (IS_ERR(dirent))
 		return dirent;
@@ -324,7 +334,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
 	struct bkey_i_dirent *dirent;
 	int ret;
 
-	dirent = dirent_create_key(trans, dir_inum, type, name, NULL, dst_inum);
+	dirent = dirent_create_key(trans, hash_info, dir_inum, type, name, NULL, dst_inum);
 	ret = PTR_ERR_OR_ZERO(dirent);
 	if (ret)
 		return ret;
@@ -333,8 +343,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
 	dirent->k.p.snapshot	= snapshot;
 
 	ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
-					dir_inum, snapshot, &dirent->k_i,
-					flags|BTREE_UPDATE_internal_snapshot_node);
+					dir_inum, snapshot, &dirent->k_i, flags);
 	*dir_offset = dirent->k.p.offset;
 
 	return ret;
@@ -344,28 +353,16 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
 		       const struct bch_hash_info *hash_info,
 		       u8 type, const struct qstr *name, u64 dst_inum,
 		       u64 *dir_offset,
-		       u64 *i_size,
 		       enum btree_iter_update_trigger_flags flags)
 {
 	struct bkey_i_dirent *dirent;
 	int ret;
 
-	if (hash_info->cf_encoding) {
-		struct qstr cf_name;
-		ret = bch2_casefold(trans, hash_info, name, &cf_name);
-		if (ret)
-			return ret;
-		dirent = dirent_create_key(trans, dir, type, name, &cf_name, dst_inum);
-	} else {
-		dirent = dirent_create_key(trans, dir, type, name, NULL, dst_inum);
-	}
-
+	dirent = dirent_create_key(trans, hash_info, dir, type, name, NULL, dst_inum);
 	ret = PTR_ERR_OR_ZERO(dirent);
 	if (ret)
 		return ret;
 
-	*i_size += bkey_bytes(&dirent->k);
-
 	ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
 			    dir, &dirent->k_i, flags);
 	*dir_offset = dirent->k.p.offset;
@@ -466,7 +463,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
 		*src_offset = dst_iter.pos.offset;
 
 	/* Create new dst key: */
-	new_dst = dirent_create_key(trans, dst_dir, 0, dst_name,
+	new_dst = dirent_create_key(trans, dst_hash, dst_dir, 0, dst_name,
 				    dst_hash->cf_encoding ? &dst_name_lookup : NULL, 0);
 	ret = PTR_ERR_OR_ZERO(new_dst);
 	if (ret)
@@ -477,7 +474,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
 
 	/* Create new src key: */
 	if (mode == BCH_RENAME_EXCHANGE) {
-		new_src = dirent_create_key(trans, src_dir, 0, src_name,
+		new_src = dirent_create_key(trans, src_hash, src_dir, 0, src_name,
 					    src_hash->cf_encoding ? &src_name_lookup : NULL, 0);
 		ret = PTR_ERR_OR_ZERO(new_src);
 		if (ret)
diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h
index 9838a7ba7ed1..d3e7ae669575 100644
--- a/fs/bcachefs/dirent.h
+++ b/fs/bcachefs/dirent.h
@@ -65,7 +65,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32,
 			enum btree_iter_update_trigger_flags);
 int bch2_dirent_create(struct btree_trans *, subvol_inum,
 		       const struct bch_hash_info *, u8,
-		       const struct qstr *, u64, u64 *, u64 *,
+		       const struct qstr *, u64, u64 *,
 		       enum btree_iter_update_trigger_flags);
 
 static inline unsigned vfs_d_type(unsigned type)
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 71d428f376a5..0cf9fd4c17bc 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -306,6 +306,7 @@ create_lostfound:
 				&lostfound_str,
 				lostfound->bi_inum,
 				&lostfound->bi_dir_offset,
+				BTREE_UPDATE_internal_snapshot_node|
 				STR_HASH_must_create) ?:
 		bch2_inode_write_flags(trans, &lostfound_iter, lostfound,
 				       BTREE_UPDATE_internal_snapshot_node);
@@ -431,6 +432,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
 				&name,
 				inode->bi_subvol ?: inode->bi_inum,
 				&inode->bi_dir_offset,
+				BTREE_UPDATE_internal_snapshot_node|
 				STR_HASH_must_create);
 	if (ret) {
 		bch_err_msg(c, ret, "error creating dirent");
diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c
index 52c58c6d53d2..9136a9097789 100644
--- a/fs/bcachefs/namei.c
+++ b/fs/bcachefs/namei.c
@@ -158,7 +158,6 @@ int bch2_create_trans(struct btree_trans *trans,
 					   name,
 					   dir_target,
 					   &dir_offset,
-					   &dir_u->bi_size,
 					   STR_HASH_must_create|BTREE_ITER_with_updates) ?:
 			bch2_inode_write(trans, &dir_iter, dir_u);
 		if (ret)
@@ -225,7 +224,6 @@ int bch2_link_trans(struct btree_trans *trans,
 				 mode_to_type(inode_u->bi_mode),
 				 name, inum.inum,
 				 &dir_offset,
-				 &dir_u->bi_size,
 				 STR_HASH_must_create);
 	if (ret)
 		goto err;
-- 
2.50.1


From 010c89468134d1991b87122379f86feae23d512f Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Wed, 21 May 2025 00:38:04 -0400
Subject: [PATCH 04/16] bcachefs: Check for casefolded dirents in non
 casefolded dirs

Check for mismatches between casefold dirents and casefold directories.

A mismatch will cause lookups to fail, as we'll be doing the lookup with
the casefolded name, which won't match the non-casefolded dirent, and
vice versa.

Reported-by: Christopher Snowhill <chris@kode54.net>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/fsck.c             | 35 ++++++++++++++++++++++++++++++++++
 fs/bcachefs/sb-errors_format.h |  8 +++++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 0cf9fd4c17bc..aaf187085276 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -2190,6 +2190,41 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
 
 	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
 
+	/* check casefold */
+	if (fsck_err_on(d.v->d_casefold != !!hash_info->cf_encoding,
+			trans, dirent_casefold_mismatch,
+			"dirent casefold does not match dir casefold\n%s",
+			(printbuf_reset(&buf),
+			 bch2_bkey_val_to_text(&buf, c, k),
+			 buf.buf))) {
+		struct qstr name = bch2_dirent_get_name(d);
+		u32 subvol = d.v->d_type == DT_SUBVOL
+			? d.v->d_parent_subvol
+			: 0;
+		u64 target = d.v->d_type == DT_SUBVOL
+			? d.v->d_child_subvol
+			: d.v->d_inum;
+		u64 dir_offset;
+
+		ret =   bch2_hash_delete_at(trans,
+					    bch2_dirent_hash_desc, hash_info, iter,
+					    BTREE_UPDATE_internal_snapshot_node) ?:
+			bch2_dirent_create_snapshot(trans, subvol,
+						    d.k->p.inode, d.k->p.snapshot,
+						    hash_info,
+						    d.v->d_type,
+						    &name,
+						    target,
+						    &dir_offset,
+						    BTREE_ITER_with_updates|
+						    BTREE_UPDATE_internal_snapshot_node|
+						    STR_HASH_must_create) ?:
+			bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+
+		/* might need another check_dirents pass */
+		goto out;
+	}
+
 	if (d.v->d_type == DT_SUBVOL) {
 		ret = check_dirent_to_subvol(trans, iter, d);
 		if (ret)
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 3b69a924086f..4036a20c6adc 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -209,6 +209,7 @@ enum bch_fsck_flags {
 	x(subvol_to_missing_root,				188,	0)		\
 	x(subvol_root_wrong_bi_subvol,				189,	FSCK_AUTOFIX)	\
 	x(bkey_in_missing_snapshot,				190,	0)		\
+	x(bkey_in_deleted_snapshot,				315,	0)		\
 	x(inode_pos_inode_nonzero,				191,	0)		\
 	x(inode_pos_blockdev_range,				192,	0)		\
 	x(inode_alloc_cursor_inode_bad,				301,	0)		\
@@ -216,6 +217,7 @@ enum bch_fsck_flags {
 	x(inode_str_hash_invalid,				194,	0)		\
 	x(inode_v3_fields_start_bad,				195,	0)		\
 	x(inode_snapshot_mismatch,				196,	0)		\
+	x(snapshot_key_missing_inode_snapshot,			314,	0)		\
 	x(inode_unlinked_but_clean,				197,	0)		\
 	x(inode_unlinked_but_nlink_nonzero,			198,	0)		\
 	x(inode_unlinked_and_not_open,				281,	0)		\
@@ -237,6 +239,8 @@ enum bch_fsck_flags {
 	x(inode_unreachable,					210,	FSCK_AUTOFIX)	\
 	x(inode_journal_seq_in_future,				299,	FSCK_AUTOFIX)	\
 	x(inode_i_sectors_underflow,				312,	FSCK_AUTOFIX)	\
+	x(inode_has_case_insensitive_not_set,			316,	FSCK_AUTOFIX)	\
+	x(inode_parent_has_case_insensitive_not_set,		317,	FSCK_AUTOFIX)	\
 	x(vfs_inode_i_blocks_underflow,				311,	FSCK_AUTOFIX)	\
 	x(vfs_inode_i_blocks_not_zero_at_truncate,		313,	FSCK_AUTOFIX)	\
 	x(deleted_inode_but_clean,				211,	FSCK_AUTOFIX)	\
@@ -262,6 +266,7 @@ enum bch_fsck_flags {
 	x(dirent_to_overwritten_inode,				302,	0)		\
 	x(dirent_to_missing_subvol,				230,	0)		\
 	x(dirent_to_itself,					231,	0)		\
+	x(dirent_casefold_mismatch,				318,	FSCK_AUTOFIX)	\
 	x(quota_type_invalid,					232,	0)		\
 	x(xattr_val_size_too_small,				233,	0)		\
 	x(xattr_val_size_too_big,				234,	0)		\
@@ -301,6 +306,7 @@ enum bch_fsck_flags {
 	x(btree_ptr_v2_written_0,				268,	0)		\
 	x(subvol_snapshot_bad,					269,	0)		\
 	x(subvol_inode_bad,					270,	0)		\
+	x(subvol_missing,					308,	FSCK_AUTOFIX)	\
 	x(alloc_key_stripe_sectors_wrong,			271,	FSCK_AUTOFIX)	\
 	x(accounting_mismatch,					272,	FSCK_AUTOFIX)	\
 	x(accounting_replicas_not_marked,			273,	0)		\
@@ -322,7 +328,7 @@ enum bch_fsck_flags {
 	x(dirent_stray_data_after_cf_name,			305,	0)		\
 	x(rebalance_work_incorrectly_set,			309,	FSCK_AUTOFIX)	\
 	x(rebalance_work_incorrectly_unset,			310,	FSCK_AUTOFIX)	\
-	x(MAX,							314,	0)
+	x(MAX,							319,	0)
 
 enum bch_sb_error_id {
 #define x(t, n, ...) BCH_FSCK_ERR_##t = n,
-- 
2.50.1


From 3a08988123c868dbfdd054541b1090fb891fa49e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 21 May 2025 18:51:49 -0600
Subject: [PATCH 05/16] io_uring/net: only retry recv bundle for a full
 transfer

If a shorter than assumed transfer was seen, a partial buffer will have
been filled. For that case it isn't sane to attempt to fill more into
the bundle before posting a completion, as that will cause a gap in
the received data.

Check if the iterator has hit zero and only allow to continue a bundle
operation if that is the case.

Also ensure that for putting finished buffers, only the current transfer
is accounted. Otherwise too many buffers may be put for a short transfer.

Link: https://github.com/axboe/liburing/issues/1409
Cc: stable@vger.kernel.org
Fixes: 7c71a0af81ba ("io_uring/net: improve recv bundles")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/net.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/io_uring/net.c b/io_uring/net.c
index 24040bc3916a..27f37fa2ef79 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -827,18 +827,24 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
 		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
 
 	if (sr->flags & IORING_RECVSEND_BUNDLE) {
-		cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret),
+		size_t this_ret = *ret - sr->done_io;
+
+		cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, this_ret),
 				      issue_flags);
 		if (sr->retry)
 			cflags = req->cqe.flags | (cflags & CQE_F_MASK);
 		/* bundle with no more immediate buffers, we're done */
 		if (req->flags & REQ_F_BL_EMPTY)
 			goto finish;
-		/* if more is available, retry and append to this one */
-		if (!sr->retry && kmsg->msg.msg_inq > 0 && *ret > 0) {
+		/*
+		 * If more is available AND it was a full transfer, retry and
+		 * append to this one
+		 */
+		if (!sr->retry && kmsg->msg.msg_inq > 0 && this_ret > 0 &&
+		    !iov_iter_count(&kmsg->msg.msg_iter)) {
 			req->cqe.flags = cflags & ~CQE_F_MASK;
 			sr->len = kmsg->msg.msg_inq;
-			sr->done_io += *ret;
+			sr->done_io += this_ret;
 			sr->retry = true;
 			return false;
 		}
-- 
2.50.1


From 407e0efdf8baf1672876d5948b75049860a93e59 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 20 May 2025 12:40:30 +0000
Subject: [PATCH 06/16] idpf: fix idpf_vport_splitq_napi_poll()

idpf_vport_splitq_napi_poll() can incorrectly return @budget
after napi_complete_done() has been called.

This violates NAPI rules, because after napi_complete_done(),
current thread lost napi ownership.

Move the test against POLL_MODE before the napi_complete_done().

Fixes: c2d548cad150 ("idpf: add TX splitq napi poll support")
Reported-by: Peter Newman <peternewman@google.com>
Closes: https://lore.kernel.org/netdev/20250520121908.1805732-1-edumazet@google.com/T/#u
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Joshua Hay <joshua.a.hay@intel.com>
Cc: Alan Brady <alan.brady@intel.com>
Cc: Madhu Chittim <madhu.chittim@intel.com>
Cc: Phani Burra <phani.r.burra@intel.com>
Cc: Pavan Kumar Linga <pavan.kumar.linga@intel.com>
Link: https://patch.msgid.link/20250520124030.1983936-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/idpf/idpf_txrx.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index bdf52cef3891..2d5f5c9f91ce 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -4025,6 +4025,14 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget)
 		return budget;
 	}
 
+	/* Switch to poll mode in the tear-down path after sending disable
+	 * queues virtchnl message, as the interrupts will be disabled after
+	 * that.
+	 */
+	if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE,
+							 q_vector->tx[0])))
+		return budget;
+
 	work_done = min_t(int, work_done, budget - 1);
 
 	/* Exit the polling mode, but don't re-enable interrupts if stack might
@@ -4035,15 +4043,7 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget)
 	else
 		idpf_vport_intr_set_wb_on_itr(q_vector);
 
-	/* Switch to poll mode in the tear-down path after sending disable
-	 * queues virtchnl message, as the interrupts will be disabled after
-	 * that
-	 */
-	if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE,
-							 q_vector->tx[0])))
-		return budget;
-	else
-		return work_done;
+	return work_done;
 }
 
 /**
-- 
2.50.1


From 3f981138109f63232a5fb7165938d4c945cc1b9d Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Sun, 18 May 2025 15:20:37 -0700
Subject: [PATCH 07/16] sch_hfsc: Fix qlen accounting bug when using peek in
 hfsc_enqueue()

When enqueuing the first packet to an HFSC class, hfsc_enqueue() calls the
child qdisc's peek() operation before incrementing sch->q.qlen and
sch->qstats.backlog. If the child qdisc uses qdisc_peek_dequeued(), this may
trigger an immediate dequeue and potential packet drop. In such cases,
qdisc_tree_reduce_backlog() is called, but the HFSC qdisc's qlen and backlog
have not yet been updated, leading to inconsistent queue accounting. This
can leave an empty HFSC class in the active list, causing further
consequences like use-after-free.

This patch fixes the bug by moving the increment of sch->q.qlen and
sch->qstats.backlog before the call to the child qdisc's peek() operation.
This ensures that queue length and backlog are always accurate when packet
drops or dequeues are triggered during the peek.

Fixes: 12d0ad3be9c3 ("net/sched/sch_hfsc.c: handle corner cases where head may change invalidating calculated deadline")
Reported-by: Mingi Cho <mincho@theori.io>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250518222038.58538-2-xiyou.wangcong@gmail.com
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/sched/sch_hfsc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index cb8c525ea20e..7986145a527c 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1569,6 +1569,9 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 		return err;
 	}
 
+	sch->qstats.backlog += len;
+	sch->q.qlen++;
+
 	if (first && !cl->cl_nactive) {
 		if (cl->cl_flags & HFSC_RSC)
 			init_ed(cl, len);
@@ -1584,9 +1587,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 
 	}
 
-	sch->qstats.backlog += len;
-	sch->q.qlen++;
-
 	return NET_XMIT_SUCCESS;
 }
 
-- 
2.50.1


From c3572acffb751af65505511b847f628c872ffae0 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Sun, 18 May 2025 15:20:38 -0700
Subject: [PATCH 08/16] selftests/tc-testing: Add an HFSC qlen accounting test

This test reproduces a scenario where HFSC queue length and backlog accounting
can become inconsistent when a peek operation triggers a dequeue and possible
drop before the parent qdisc updates its counters. The test sets up a DRR root
qdisc with an HFSC class, netem, and blackhole children, and uses Scapy to
inject a packet. It helps to verify that HFSC correctly tracks qlen and backlog
even when packets are dropped during peek-induced dequeue.

Cc: Mingi Cho <mincho@theori.io>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250518222038.58538-3-xiyou.wangcong@gmail.com
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../tc-testing/tc-tests/infra/qdiscs.json     | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index a951c0d33cd2..ddc97ecd8b39 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -573,5 +573,32 @@
         "teardown": [
             "$TC qdisc del dev $DEV1 handle 1: root"
         ]
+    },
+    {
+        "id": "831d",
+        "name": "Test HFSC qlen accounting with DRR/NETEM/BLACKHOLE chain",
+        "category": ["qdisc", "hfsc", "drr", "netem", "blackhole"],
+        "plugins": { "requires": ["nsPlugin", "scapyPlugin"] },
+        "setup": [
+            "$IP link set dev $DEV1 up || true",
+            "$TC qdisc add dev $DEV1 root handle 1: drr",
+            "$TC filter add dev $DEV1 parent 1: basic classid 1:1",
+            "$TC class add dev $DEV1 parent 1: classid 1:1 drr",
+            "$TC qdisc add dev $DEV1 parent 1:1 handle 2: hfsc def 1",
+            "$TC class add dev $DEV1 parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0",
+            "$TC qdisc add dev $DEV1 parent 2:1 handle 3: netem",
+            "$TC qdisc add dev $DEV1 parent 3:1 handle 4: blackhole"
+        ],
+        "scapy": {
+            "iface": "$DEV0",
+            "count": 5,
+            "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()"
+        },
+        "cmdUnderTest": "$TC -s qdisc show dev $DEV1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC -s qdisc show dev $DEV1",
+        "matchPattern": "qdisc hfsc",
+        "matchCount": "1",
+        "teardown": ["$TC qdisc del dev $DEV1 root handle 1: drr"]
     }
 ]
-- 
2.50.1


From 184fb40f731bd3353b0887731f7caba66609e9cd Mon Sep 17 00:00:00 2001
From: Suman Ghosh <sumang@marvell.com>
Date: Mon, 19 May 2025 12:56:58 +0530
Subject: [PATCH 09/16] octeontx2-pf: Avoid adding dcbnl_ops for LBK and SDP vf

Priority flow control is not supported for LBK and SDP vf. This patch
adds support to not add dcbnl_ops for LBK and SDP vf.

Fixes: 8e67558177f8 ("octeontx2-pf: PFC config support with DCBx")
Signed-off-by: Suman Ghosh <sumang@marvell.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250519072658.2960851-1-sumang@marvell.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index 7ef3ba477d49..9b28be4c4a5d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -729,9 +729,12 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 #ifdef CONFIG_DCB
-	err = otx2_dcbnl_set_ops(netdev);
-	if (err)
-		goto err_free_zc_bmap;
+	/* Priority flow control is not supported for LBK and SDP vf(s) */
+	if (!(is_otx2_lbkvf(vf->pdev) || is_otx2_sdp_rep(vf->pdev))) {
+		err = otx2_dcbnl_set_ops(netdev);
+		if (err)
+			goto err_free_zc_bmap;
+	}
 #endif
 	otx2_qos_init(vf, qos_txqs);
 
-- 
2.50.1


From e279024617134c94fd3e37470156534d5f2b3472 Mon Sep 17 00:00:00 2001
From: Wang Liang <wangliang74@huawei.com>
Date: Tue, 20 May 2025 18:14:04 +0800
Subject: [PATCH 10/16] net/tipc: fix slab-use-after-free Read in
 tipc_aead_encrypt_done

Syzbot reported a slab-use-after-free with the following call trace:

  ==================================================================
  BUG: KASAN: slab-use-after-free in tipc_aead_encrypt_done+0x4bd/0x510 net/tipc/crypto.c:840
  Read of size 8 at addr ffff88807a733000 by task kworker/1:0/25

  Call Trace:
   kasan_report+0xd9/0x110 mm/kasan/report.c:601
   tipc_aead_encrypt_done+0x4bd/0x510 net/tipc/crypto.c:840
   crypto_request_complete include/crypto/algapi.h:266
   aead_request_complete include/crypto/internal/aead.h:85
   cryptd_aead_crypt+0x3b8/0x750 crypto/cryptd.c:772
   crypto_request_complete include/crypto/algapi.h:266
   cryptd_queue_worker+0x131/0x200 crypto/cryptd.c:181
   process_one_work+0x9fb/0x1b60 kernel/workqueue.c:3231

  Allocated by task 8355:
   kzalloc_noprof include/linux/slab.h:778
   tipc_crypto_start+0xcc/0x9e0 net/tipc/crypto.c:1466
   tipc_init_net+0x2dd/0x430 net/tipc/core.c:72
   ops_init+0xb9/0x650 net/core/net_namespace.c:139
   setup_net+0x435/0xb40 net/core/net_namespace.c:343
   copy_net_ns+0x2f0/0x670 net/core/net_namespace.c:508
   create_new_namespaces+0x3ea/0xb10 kernel/nsproxy.c:110
   unshare_nsproxy_namespaces+0xc0/0x1f0 kernel/nsproxy.c:228
   ksys_unshare+0x419/0x970 kernel/fork.c:3323
   __do_sys_unshare kernel/fork.c:3394

  Freed by task 63:
   kfree+0x12a/0x3b0 mm/slub.c:4557
   tipc_crypto_stop+0x23c/0x500 net/tipc/crypto.c:1539
   tipc_exit_net+0x8c/0x110 net/tipc/core.c:119
   ops_exit_list+0xb0/0x180 net/core/net_namespace.c:173
   cleanup_net+0x5b7/0xbf0 net/core/net_namespace.c:640
   process_one_work+0x9fb/0x1b60 kernel/workqueue.c:3231

After freed the tipc_crypto tx by delete namespace, tipc_aead_encrypt_done
may still visit it in cryptd_queue_worker workqueue.

I reproduce this issue by:
  ip netns add ns1
  ip link add veth1 type veth peer name veth2
  ip link set veth1 netns ns1
  ip netns exec ns1 tipc bearer enable media eth dev veth1
  ip netns exec ns1 tipc node set key this_is_a_master_key master
  ip netns exec ns1 tipc bearer disable media eth dev veth1
  ip netns del ns1

The key of reproduction is that, simd_aead_encrypt is interrupted, leading
to crypto_simd_usable() return false. Thus, the cryptd_queue_worker is
triggered, and the tipc_crypto tx will be visited.

  tipc_disc_timeout
    tipc_bearer_xmit_skb
      tipc_crypto_xmit
        tipc_aead_encrypt
          crypto_aead_encrypt
            // encrypt()
            simd_aead_encrypt
              // crypto_simd_usable() is false
              child = &ctx->cryptd_tfm->base;

  simd_aead_encrypt
    crypto_aead_encrypt
      // encrypt()
      cryptd_aead_encrypt_enqueue
        cryptd_aead_enqueue
          cryptd_enqueue_request
            // trigger cryptd_queue_worker
            queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work)

Fix this by holding net reference count before encrypt.

Reported-by: syzbot+55c12726619ff85ce1f6@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=55c12726619ff85ce1f6
Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication")
Signed-off-by: Wang Liang <wangliang74@huawei.com>
Link: https://patch.msgid.link/20250520101404.1341730-1-wangliang74@huawei.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/tipc/crypto.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index c524421ec652..8584893b4785 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -817,12 +817,16 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
 		goto exit;
 	}
 
+	/* Get net to avoid freed tipc_crypto when delete namespace */
+	get_net(aead->crypto->net);
+
 	/* Now, do encrypt */
 	rc = crypto_aead_encrypt(req);
 	if (rc == -EINPROGRESS || rc == -EBUSY)
 		return rc;
 
 	tipc_bearer_put(b);
+	put_net(aead->crypto->net);
 
 exit:
 	kfree(ctx);
@@ -860,6 +864,7 @@ static void tipc_aead_encrypt_done(void *data, int err)
 	kfree(tx_ctx);
 	tipc_bearer_put(b);
 	tipc_aead_put(aead);
+	put_net(net);
 }
 
 /**
-- 
2.50.1


From 0eefa27b493306928d88af6368193b134c98fd64 Mon Sep 17 00:00:00 2001
From: Subbaraya Sundeep <sbhatta@marvell.com>
Date: Wed, 21 May 2025 11:38:33 +0530
Subject: [PATCH 11/16] octeontx2-af: Set LMT_ENA bit for APR table entries

This patch enables the LMT line for a PF/VF by setting the
LMT_ENA bit in the APR_LMT_MAP_ENTRY_S structure.

Additionally, it simplifies the logic for calculating the
LMTST table index by consistently using the maximum
number of hw supported VFs (i.e., 256).

Fixes: 873a1e3d207a ("octeontx2-af: cn10k: Setting up lmtst map table").
Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Link: https://patch.msgid.link/20250521060834.19780-2-gakula@marvell.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../net/ethernet/marvell/octeontx2/af/rvu_cn10k.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
index 7fa98aeb3663..3838c04b78c2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
@@ -15,13 +15,17 @@
 #define LMT_TBL_OP_WRITE	1
 #define LMT_MAP_TABLE_SIZE	(128 * 1024)
 #define LMT_MAPTBL_ENTRY_SIZE	16
+#define LMT_MAX_VFS		256
+
+#define LMT_MAP_ENTRY_ENA      BIT_ULL(20)
+#define LMT_MAP_ENTRY_LINES    GENMASK_ULL(18, 16)
 
 /* Function to perform operations (read/write) on lmtst map table */
 static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val,
 			       int lmt_tbl_op)
 {
 	void __iomem *lmt_map_base;
-	u64 tbl_base;
+	u64 tbl_base, cfg;
 
 	tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE);
 
@@ -35,6 +39,13 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val,
 		*val = readq(lmt_map_base + index);
 	} else {
 		writeq((*val), (lmt_map_base + index));
+
+		cfg = FIELD_PREP(LMT_MAP_ENTRY_ENA, 0x1);
+		/* 2048 LMTLINES */
+		cfg |= FIELD_PREP(LMT_MAP_ENTRY_LINES, 0x6);
+
+		writeq(cfg, (lmt_map_base + (index + 8)));
+
 		/* Flushing the AP interceptor cache to make APR_LMT_MAP_ENTRY_S
 		 * changes effective. Write 1 for flush and read is being used as a
 		 * barrier and sets up a data dependency. Write to 0 after a write
@@ -52,7 +63,7 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val,
 #define LMT_MAP_TBL_W1_OFF  8
 static u32 rvu_get_lmtst_tbl_index(struct rvu *rvu, u16 pcifunc)
 {
-	return ((rvu_get_pf(pcifunc) * rvu->hw->total_vfs) +
+	return ((rvu_get_pf(pcifunc) * LMT_MAX_VFS) +
 		(pcifunc & RVU_PFVF_FUNC_MASK)) * LMT_MAPTBL_ENTRY_SIZE;
 }
 
-- 
2.50.1


From a6ae7129819ad20788e610261246e71736543b8b Mon Sep 17 00:00:00 2001
From: Geetha sowjanya <gakula@marvell.com>
Date: Wed, 21 May 2025 11:38:34 +0530
Subject: [PATCH 12/16] octeontx2-af: Fix APR entry mapping based on
 APR_LMT_CFG

The current implementation maps the APR table using a fixed size,
which can lead to incorrect mapping when the number of PFs and VFs
varies.
This patch corrects the mapping by calculating the APR table
size dynamically based on the values configured in the
APR_LMT_CFG register, ensuring accurate representation
of APR entries in debugfs.

Fixes: 0daa55d033b0 ("octeontx2-af: cn10k: debugfs for dumping LMTST map table").
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
Link: https://patch.msgid.link/20250521060834.19780-3-gakula@marvell.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c |  9 ++++++---
 .../net/ethernet/marvell/octeontx2/af/rvu_debugfs.c   | 11 ++++++++---
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
index 3838c04b78c2..4a3370a40dd8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
@@ -13,7 +13,6 @@
 /* RVU LMTST */
 #define LMT_TBL_OP_READ		0
 #define LMT_TBL_OP_WRITE	1
-#define LMT_MAP_TABLE_SIZE	(128 * 1024)
 #define LMT_MAPTBL_ENTRY_SIZE	16
 #define LMT_MAX_VFS		256
 
@@ -26,10 +25,14 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val,
 {
 	void __iomem *lmt_map_base;
 	u64 tbl_base, cfg;
+	int pfs, vfs;
 
 	tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE);
+	cfg  = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG);
+	vfs = 1 << (cfg & 0xF);
+	pfs = 1 << ((cfg >> 4) & 0x7);
 
-	lmt_map_base = ioremap_wc(tbl_base, LMT_MAP_TABLE_SIZE);
+	lmt_map_base = ioremap_wc(tbl_base, pfs * vfs * LMT_MAPTBL_ENTRY_SIZE);
 	if (!lmt_map_base) {
 		dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n");
 		return -ENOMEM;
@@ -80,7 +83,7 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc,
 
 	mutex_lock(&rvu->rsrc_lock);
 	rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_REQ, iova);
-	pf = rvu_get_pf(pcifunc) & 0x1F;
+	pf = rvu_get_pf(pcifunc) & RVU_PFVF_PF_MASK;
 	val = BIT_ULL(63) | BIT_ULL(14) | BIT_ULL(13) | pf << 8 |
 	      ((pcifunc & RVU_PFVF_FUNC_MASK) & 0xFF);
 	rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TXN_REQ, val);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index a1f9ec03c2ce..c827da626471 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -553,6 +553,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp,
 	u64 lmt_addr, val, tbl_base;
 	int pf, vf, num_vfs, hw_vfs;
 	void __iomem *lmt_map_base;
+	int apr_pfs, apr_vfs;
 	int buf_size = 10240;
 	size_t off = 0;
 	int index = 0;
@@ -568,8 +569,12 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp,
 		return -ENOMEM;
 
 	tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE);
+	val  = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG);
+	apr_vfs = 1 << (val & 0xF);
+	apr_pfs = 1 << ((val >> 4) & 0x7);
 
-	lmt_map_base = ioremap_wc(tbl_base, 128 * 1024);
+	lmt_map_base = ioremap_wc(tbl_base, apr_pfs * apr_vfs *
+				  LMT_MAPTBL_ENTRY_SIZE);
 	if (!lmt_map_base) {
 		dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n");
 		kfree(buf);
@@ -591,7 +596,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp,
 		off += scnprintf(&buf[off], buf_size - 1 - off, "PF%d  \t\t\t",
 				    pf);
 
-		index = pf * rvu->hw->total_vfs * LMT_MAPTBL_ENTRY_SIZE;
+		index = pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE;
 		off += scnprintf(&buf[off], buf_size - 1 - off, " 0x%llx\t\t",
 				 (tbl_base + index));
 		lmt_addr = readq(lmt_map_base + index);
@@ -604,7 +609,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp,
 		/* Reading num of VFs per PF */
 		rvu_get_pf_numvfs(rvu, pf, &num_vfs, &hw_vfs);
 		for (vf = 0; vf < num_vfs; vf++) {
-			index = (pf * rvu->hw->total_vfs * 16) +
+			index = (pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE) +
 				((vf + 1)  * LMT_MAPTBL_ENTRY_SIZE);
 			off += scnprintf(&buf[off], buf_size - 1 - off,
 					    "PF%d:VF%d  \t\t", pf, vf);
-- 
2.50.1


From f2eae58c4428bd792c8e91e3666ab0718d87b44a Mon Sep 17 00:00:00 2001
From: Todd Brandt <todd.e.brandt@intel.com>
Date: Tue, 20 May 2025 03:45:55 -0700
Subject: [PATCH 13/16] platform/x86/intel/pmc: Fix Arrow Lake U/H NPU PCI ID
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

The ARL requires that the GMA and NPU devices both be in D3Hot in order
for PC10 and S0iX to be achieved in S2idle. The original ARL-H/U addition
to the intel_pmc_core driver attempted to do this by switching them to D3
in the init and resume calls of the intel_pmc_core driver.

The problem is the ARL-H/U have a different NPU device and thus are not
being properly set and thus S0iX does not work properly in ARL-H/U. This
patch creates a new ARL-H specific device id that is correct and also
adds the D3 fixup to the suspend callback. This way if the PCI devies
drop from D3 to D0 after resume they can be corrected for the next
suspend. Thus there is no dropout in S0iX.

Fixes: bd820906ea9d ("platform/x86/intel/pmc: Add Arrow Lake U/H support to intel_pmc_core driver")
Signed-off-by: Todd Brandt <todd.e.brandt@intel.com>
Link: https://lore.kernel.org/r/a61f78be45c13f39e122dcc684b636f4b21e79a0.1747737446.git.todd.e.brandt@intel.com
Reviewed-by: Ilpo JÃ¤rvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo JÃ¤rvinen <ilpo.jarvinen@linux.intel.com>
---
 drivers/platform/x86/intel/pmc/arl.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/x86/intel/pmc/arl.c b/drivers/platform/x86/intel/pmc/arl.c
index 320993bd6d31..f9c48738b853 100644
--- a/drivers/platform/x86/intel/pmc/arl.c
+++ b/drivers/platform/x86/intel/pmc/arl.c
@@ -681,6 +681,7 @@ static struct pmc_info arl_pmc_info_list[] = {
 
 #define ARL_NPU_PCI_DEV			0xad1d
 #define ARL_GNA_PCI_DEV			0xae4c
+#define ARL_H_NPU_PCI_DEV		0x7d1d
 #define ARL_H_GNA_PCI_DEV		0x774c
 /*
  * Set power state of select devices that do not have drivers to D3
@@ -694,7 +695,7 @@ static void arl_d3_fixup(void)
 
 static void arl_h_d3_fixup(void)
 {
-	pmc_core_set_device_d3(ARL_NPU_PCI_DEV);
+	pmc_core_set_device_d3(ARL_H_NPU_PCI_DEV);
 	pmc_core_set_device_d3(ARL_H_GNA_PCI_DEV);
 }
 
-- 
2.50.1


From 8dc9c078ab1870dff8613a5688de589c4175b679 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Wed, 30 Apr 2025 09:34:35 +0100
Subject: [PATCH 14/16] tpm: remove kmalloc failure error message

The kmalloc failure message is just noise. Remove it and replace -EFAULT
with -ENOMEM as standard for out of memory allocation error returns.

Link: https://lore.kernel.org/linux-integrity/20250430083435.860146-1-colin.i.king@gmail.com/
Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/eventlog/tpm1.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/char/tpm/eventlog/tpm1.c b/drivers/char/tpm/eventlog/tpm1.c
index 12ee42a31c71..e7913b2853d5 100644
--- a/drivers/char/tpm/eventlog/tpm1.c
+++ b/drivers/char/tpm/eventlog/tpm1.c
@@ -257,11 +257,8 @@ static int tpm1_ascii_bios_measurements_show(struct seq_file *m, void *v)
 	    (unsigned char *)(v + sizeof(struct tcpa_event));
 
 	eventname = kmalloc(MAX_TEXT_EVENT, GFP_KERNEL);
-	if (!eventname) {
-		printk(KERN_ERR "%s: ERROR - No Memory for event name\n ",
-		       __func__);
-		return -EFAULT;
-	}
+	if (!eventname)
+		return -ENOMEM;
 
 	/* 1st: PCR */
 	seq_printf(m, "%2d ", do_endian_conversion(event->pcr_index));
-- 
2.50.1


From a85b55ee64a5da58c6e2c69e2648023189210eae Mon Sep 17 00:00:00 2001
From: Yeoreum Yun <yeoreum.yun@arm.com>
Date: Tue, 15 Apr 2025 19:50:12 +0100
Subject: [PATCH 15/16] tpm_ffa_crb: access tpm service over FF-A direct
 message request v2

For secure partition with multi service, tpm_ffa_crb can access tpm
service with direct message request v2 interface according to chapter 3.3,
TPM Service Command Response Buffer Interface Over FF-A specificationi v1.0 BET.

This patch reflects this spec to access tpm service over
FF-A direct message request v2 ABI.

Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_crb_ffa.c | 55 ++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 15 deletions(-)

diff --git a/drivers/char/tpm/tpm_crb_ffa.c b/drivers/char/tpm/tpm_crb_ffa.c
index 3169a87a56b6..fed775cf53ab 100644
--- a/drivers/char/tpm/tpm_crb_ffa.c
+++ b/drivers/char/tpm/tpm_crb_ffa.c
@@ -105,7 +105,10 @@ struct tpm_crb_ffa {
 	u16 minor_version;
 	/* lock to protect sending of FF-A messages: */
 	struct mutex msg_data_lock;
-	struct ffa_send_direct_data direct_msg_data;
+	union {
+		struct ffa_send_direct_data direct_msg_data;
+		struct ffa_send_direct_data2 direct_msg_data2;
+	};
 };
 
 static struct tpm_crb_ffa *tpm_crb_ffa;
@@ -185,18 +188,34 @@ static int __tpm_crb_ffa_send_recieve(unsigned long func_id,
 
 	msg_ops = tpm_crb_ffa->ffa_dev->ops->msg_ops;
 
-	memset(&tpm_crb_ffa->direct_msg_data, 0x00,
-	       sizeof(struct ffa_send_direct_data));
-
-	tpm_crb_ffa->direct_msg_data.data1 = func_id;
-	tpm_crb_ffa->direct_msg_data.data2 = a0;
-	tpm_crb_ffa->direct_msg_data.data3 = a1;
-	tpm_crb_ffa->direct_msg_data.data4 = a2;
+	if (ffa_partition_supports_direct_req2_recv(tpm_crb_ffa->ffa_dev)) {
+		memset(&tpm_crb_ffa->direct_msg_data2, 0x00,
+		       sizeof(struct ffa_send_direct_data2));
+
+		tpm_crb_ffa->direct_msg_data2.data[0] = func_id;
+		tpm_crb_ffa->direct_msg_data2.data[1] = a0;
+		tpm_crb_ffa->direct_msg_data2.data[2] = a1;
+		tpm_crb_ffa->direct_msg_data2.data[3] = a2;
+
+		ret = msg_ops->sync_send_receive2(tpm_crb_ffa->ffa_dev,
+				&tpm_crb_ffa->direct_msg_data2);
+		if (!ret)
+			ret = tpm_crb_ffa_to_linux_errno(tpm_crb_ffa->direct_msg_data2.data[0]);
+	} else {
+		memset(&tpm_crb_ffa->direct_msg_data, 0x00,
+		       sizeof(struct ffa_send_direct_data));
+
+		tpm_crb_ffa->direct_msg_data.data1 = func_id;
+		tpm_crb_ffa->direct_msg_data.data2 = a0;
+		tpm_crb_ffa->direct_msg_data.data3 = a1;
+		tpm_crb_ffa->direct_msg_data.data4 = a2;
+
+		ret = msg_ops->sync_send_receive(tpm_crb_ffa->ffa_dev,
+				&tpm_crb_ffa->direct_msg_data);
+		if (!ret)
+			ret = tpm_crb_ffa_to_linux_errno(tpm_crb_ffa->direct_msg_data.data1);
+	}
 
-	ret = msg_ops->sync_send_receive(tpm_crb_ffa->ffa_dev,
-			&tpm_crb_ffa->direct_msg_data);
-	if (!ret)
-		ret = tpm_crb_ffa_to_linux_errno(tpm_crb_ffa->direct_msg_data.data1);
 
 	return ret;
 }
@@ -231,8 +250,13 @@ int tpm_crb_ffa_get_interface_version(u16 *major, u16 *minor)
 
 	rc = __tpm_crb_ffa_send_recieve(CRB_FFA_GET_INTERFACE_VERSION, 0x00, 0x00, 0x00);
 	if (!rc) {
-		*major = CRB_FFA_MAJOR_VERSION(tpm_crb_ffa->direct_msg_data.data2);
-		*minor = CRB_FFA_MINOR_VERSION(tpm_crb_ffa->direct_msg_data.data2);
+		if (ffa_partition_supports_direct_req2_recv(tpm_crb_ffa->ffa_dev)) {
+			*major = CRB_FFA_MAJOR_VERSION(tpm_crb_ffa->direct_msg_data2.data[1]);
+			*minor = CRB_FFA_MINOR_VERSION(tpm_crb_ffa->direct_msg_data2.data[1]);
+		} else {
+			*major = CRB_FFA_MAJOR_VERSION(tpm_crb_ffa->direct_msg_data.data2);
+			*minor = CRB_FFA_MINOR_VERSION(tpm_crb_ffa->direct_msg_data.data2);
+		}
 	}
 
 	return rc;
@@ -277,7 +301,8 @@ static int tpm_crb_ffa_probe(struct ffa_device *ffa_dev)
 
 	tpm_crb_ffa = ERR_PTR(-ENODEV); // set tpm_crb_ffa so we can detect probe failure
 
-	if (!ffa_partition_supports_direct_recv(ffa_dev)) {
+	if (!ffa_partition_supports_direct_recv(ffa_dev) &&
+	    !ffa_partition_supports_direct_req2_recv(ffa_dev)) {
 		pr_err("TPM partition doesn't support direct message receive.\n");
 		return -EINVAL;
 	}
-- 
2.50.1


From 8d67b79bcc96f85cbd8c16a64e6e8899d5be522c Mon Sep 17 00:00:00 2001
From: Yeoreum Yun <yeoreum.yun@arm.com>
Date: Tue, 15 Apr 2025 19:50:13 +0100
Subject: [PATCH 16/16] tpm_crb_ffa: use dev_xx() macro to print log

Instead of pr_xxx() macro, use dev_xxx() to print log.
This patch changes some error log level to warn log level when
the tpm_crb_ffa secure partition doesn't support properly but
system can run without it.
(i.e) unsupport of direct message ABI or unsupported ABI version

Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_crb_ffa.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/char/tpm/tpm_crb_ffa.c b/drivers/char/tpm/tpm_crb_ffa.c
index fed775cf53ab..66b38b2cbdd5 100644
--- a/drivers/char/tpm/tpm_crb_ffa.c
+++ b/drivers/char/tpm/tpm_crb_ffa.c
@@ -303,7 +303,7 @@ static int tpm_crb_ffa_probe(struct ffa_device *ffa_dev)
 
 	if (!ffa_partition_supports_direct_recv(ffa_dev) &&
 	    !ffa_partition_supports_direct_req2_recv(ffa_dev)) {
-		pr_err("TPM partition doesn't support direct message receive.\n");
+		dev_warn(&ffa_dev->dev, "partition doesn't support direct message receive.\n");
 		return -EINVAL;
 	}
 
@@ -324,17 +324,17 @@ static int tpm_crb_ffa_probe(struct ffa_device *ffa_dev)
 	rc = tpm_crb_ffa_get_interface_version(&tpm_crb_ffa->major_version,
 					       &tpm_crb_ffa->minor_version);
 	if (rc) {
-		pr_err("failed to get crb interface version. rc:%d", rc);
+		dev_err(&ffa_dev->dev, "failed to get crb interface version. rc:%d\n", rc);
 		goto out;
 	}
 
-	pr_info("ABI version %u.%u", tpm_crb_ffa->major_version,
+	dev_info(&ffa_dev->dev, "ABI version %u.%u\n", tpm_crb_ffa->major_version,
 		tpm_crb_ffa->minor_version);
 
 	if (tpm_crb_ffa->major_version != CRB_FFA_VERSION_MAJOR ||
 	    (tpm_crb_ffa->minor_version > 0 &&
 	    tpm_crb_ffa->minor_version < CRB_FFA_VERSION_MINOR)) {
-		pr_err("Incompatible ABI version");
+		dev_warn(&ffa_dev->dev, "Incompatible ABI version\n");
 		goto out;
 	}
 
-- 
2.50.1