From 6f110a5e4f9977c31ce76fefbfef6fd4eab6bfb7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Apr 2025 10:00:04 -0700 Subject: [PATCH 01/16] Disable SLUB_TINY for build testing ... and don't error out so hard on missing module descriptions. Before commit 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") we used to warn about missing module descriptions, but only when building with extra warnigns (ie 'W=1'). After that commit the warning became an unconditional hard error. And it turns out not all modules have been converted despite the claims to the contrary. As reported by Damian Tometzki, the slub KUnit test didn't have a module description, and apparently nobody ever really noticed. The reason nobody noticed seems to be that the slub KUnit tests get disabled by SLUB_TINY, which also ends up disabling a lot of other code, both in tests and in slub itself. And so anybody doing full build tests didn't actually see this failre. So let's disable SLUB_TINY for build-only tests, since it clearly ends up limiting build coverage. Also turn the missing module descriptions error back into a warning, but let's keep it around for non-'W=1' builds. Reported-by: Damian Tometzki Link: https://lore.kernel.org/all/01070196099fd059-e8463438-7b1b-4ec8-816d-173874be9966-000000@eu-central-1.amazonses.com/ Cc: Masahiro Yamada Cc: Jeff Johnson Fixes: 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") Signed-off-by: Linus Torvalds --- mm/Kconfig | 2 +- scripts/mod/modpost.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/Kconfig b/mm/Kconfig index d3fb3762887b..e113f713b493 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -201,7 +201,7 @@ config KVFREE_RCU_BATCHED config SLUB_TINY bool "Configure for minimal memory footprint" - depends on EXPERT + depends on EXPERT && !COMPILE_TEST select SLAB_MERGE_DEFAULT help Configures the slab allocator in a way to achieve minimal memory diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 92627e8d0e16..be89921d60b6 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1603,7 +1603,7 @@ static void read_symbols(const char *modname) } if (!get_modinfo(&info, "description")) - error("missing MODULE_DESCRIPTION() in %s\n", modname); + warn("missing MODULE_DESCRIPTION() in %s\n", modname); } for (sym = info.symtab_start; sym < info.symtab_stop; sym++) { -- 2.51.0 From ec4acd3166d8a7a03b059d01b9c6f11a658e833f Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 14:29:57 -0400 Subject: [PATCH 02/16] tools/power turbostat: disable "cpuidle" invocation counters, by default Create "pct_idle" counter group, the sofware notion of residency so it can now be singled out, independent of other counter groups. Create "cpuidle" group, the cpuidle invocation counts. Disable "cpuidle", by default. Create "swidle" = "cpuidle" + "pct_idle". Undocument "sysfs", the old name for "swidle", but keep it working for backwards compatibilty. Create "hwidle", all the HW idle counters Modify "idle", enabled by default "idle" = "hwidle" + "pct_idle" (and now excludes "cpuidle") Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 12 +++++----- tools/power/x86/turbostat/turbostat.c | 34 +++++++++++++++++++++------ 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index e86493880c16..b74ed916057e 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -100,7 +100,7 @@ The column name "all" can be used to enable all disabled-by-default built-in cou .PP \fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. .PP -\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "sysfs", "other". +\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "other". "idle" (enabled by default), includes "hwidle" and "idle_pct". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "idle_pct". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle". .PP \fB--Dump\fP displays the raw counter values. .PP @@ -158,15 +158,15 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs. .PP -\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. +\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. +\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. +\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. +\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. These counters are in the "pct_idle" group, which is enabled by default. .PP -\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters. +\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters and are in the "hwidle" group, which is enabled, by default. .PP \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. .PP diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index df0391bedcde..ab184f95cdaf 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -153,7 +153,7 @@ struct msr_counter bic[] = { { 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 }, - { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "cpuidle", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 }, @@ -206,6 +206,7 @@ struct msr_counter bic[] = { { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -219,7 +220,7 @@ struct msr_counter bic[] = { #define BIC_TSC_MHz (1ULL << 7) #define BIC_IRQ (1ULL << 8) #define BIC_SMI (1ULL << 9) -#define BIC_sysfs (1ULL << 10) +#define BIC_cpuidle (1ULL << 10) #define BIC_CPU_c1 (1ULL << 11) #define BIC_CPU_c3 (1ULL << 12) #define BIC_CPU_c6 (1ULL << 13) @@ -272,17 +273,20 @@ struct msr_counter bic[] = { #define BIC_Sys_J (1ULL << 60) #define BIC_NMI (1ULL << 61) #define BIC_CPU_c1e (1ULL << 62) +#define BIC_pct_idle (1ULL << 63) #define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) #define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) #define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_GROUP_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle ) +#define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle) #define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle) unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); -unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; +unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC; #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) @@ -2362,6 +2366,15 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) } else if (!strcmp(name_list, "idle")) { retval |= BIC_GROUP_IDLE; break; + } else if (!strcmp(name_list, "swidle")) { + retval |= BIC_GROUP_SW_IDLE; + break; + } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */ + retval |= BIC_GROUP_SW_IDLE; + break; + } else if (!strcmp(name_list, "hwidle")) { + retval |= BIC_GROUP_HW_IDLE; + break; } else if (!strcmp(name_list, "frequency")) { retval |= BIC_GROUP_FREQUENCY; break; @@ -2372,6 +2385,7 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) } if (i == MAX_BIC) { + fprintf(stderr, "deferred %s\n", name_list); if (mode == SHOW_LIST) { deferred_add_names[deferred_add_index++] = name_list; if (deferred_add_index >= MAX_DEFERRED) { @@ -10269,6 +10283,9 @@ void probe_cpuidle_residency(void) int min_state = 1024, max_state = 0; char *sp; + if (!DO_BIC(BIC_pct_idle)) + return; + for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -10291,7 +10308,7 @@ void probe_cpuidle_residency(void) sprintf(path, "cpuidle/state%d/time", state); - if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + if (!DO_BIC(BIC_pct_idle) && !is_deferred_add(name_buf)) continue; if (is_deferred_skip(name_buf)) @@ -10315,6 +10332,9 @@ void probe_cpuidle_counts(void) int min_state = 1024, max_state = 0; char *sp; + if (!DO_BIC(BIC_cpuidle)) + return; + for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -10327,7 +10347,7 @@ void probe_cpuidle_counts(void) remove_underbar(name_buf); - if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf)) continue; if (is_deferred_skip(name_buf)) -- 2.51.0 From 03e00e373cab981ad808271b2650700cfa0fbda6 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 14:49:20 -0400 Subject: [PATCH 03/16] tools/power turbostat: v2025.05.06 Support up to 8192 processors Add cpuidle governor debug telemetry, disabled by default Update default output to exclude cpuidle invocation counts Bug fixes Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ab184f95cdaf..1b9fdc1a7ee8 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9594,7 +9594,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2025.02.02 - Len Brown \n"); + fprintf(outf, "turbostat version 2025.04.06 - Len Brown \n"); } #define COMMAND_LINE_SIZE 2048 -- 2.51.0 From 0efdedb3358aa78102967f242379686f94315830 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 2 Apr 2025 21:21:57 +0100 Subject: [PATCH 04/16] tools/include: make uapi/linux/types.h usable from assembly MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The "real" linux/types.h UAPI header gracefully degrades to a NOOP when included from assembly code. Mirror this behaviour in the tools/ variant. Test for __ASSEMBLER__ over __ASSEMBLY__ as the former is provided by the toolchain automatically. Reported-by: Mark Brown Closes: https://lore.kernel.org/lkml/af553c62-ca2f-4956-932c-dd6e3a126f58@sirena.org.uk/ Fixes: c9fbaa879508 ("selftests: vDSO: parse_vdso: Use UAPI headers instead of libc headers") Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20250321-uapi-consistency-v1-1-439070118dc0@linutronix.de Signed-off-by: Mark Brown Reviewed-by: Mark Brown Signed-off-by: Linus Torvalds --- tools/include/uapi/linux/types.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h index 91fa51a9c31d..85aa327245c6 100644 --- a/tools/include/uapi/linux/types.h +++ b/tools/include/uapi/linux/types.h @@ -4,6 +4,8 @@ #include +#ifndef __ASSEMBLER__ + /* copied from linux:include/uapi/linux/types.h */ #define __bitwise typedef __u16 __bitwise __le16; @@ -20,4 +22,5 @@ typedef __u32 __bitwise __wsum; #define __aligned_be64 __be64 __attribute__((aligned(8))) #define __aligned_le64 __le64 __attribute__((aligned(8))) +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_LINUX_TYPES_H */ -- 2.51.0 From 0af2f6be1b4281385b618cb86ad946eded089ac8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Apr 2025 13:11:33 -0700 Subject: [PATCH 05/16] Linux 6.15-rc1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index e55726a71d95..38689a0c3605 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 -PATCHLEVEL = 14 +PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From ad2698efce37e910dcf3c3914263e6cb3e86f8cd Mon Sep 17 00:00:00 2001 From: Nas Chung Date: Thu, 25 Jul 2024 15:10:32 +0900 Subject: [PATCH 06/16] media: uapi: v4l: Change V4L2_TYPE_IS_CAPTURE condition Explicitly compare a buffer type only with valid buffer types, to avoid matching a buffer type outside of the valid buffer type set. Signed-off-by: Nas Chung Reviewed-by: Michael Tretter Signed-off-by: Sebastian Fricke Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index c8cb2796130f..ccd6ad53432e 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -153,10 +153,18 @@ enum v4l2_buf_type { V4L2_BUF_TYPE_SDR_OUTPUT = 12, V4L2_BUF_TYPE_META_CAPTURE = 13, V4L2_BUF_TYPE_META_OUTPUT = 14, + /* + * Note: V4L2_TYPE_IS_VALID and V4L2_TYPE_IS_OUTPUT must + * be updated if a new type is added. + */ /* Deprecated, do not use */ V4L2_BUF_TYPE_PRIVATE = 0x80, }; +#define V4L2_TYPE_IS_VALID(type) \ + ((type) >= V4L2_BUF_TYPE_VIDEO_CAPTURE &&\ + (type) <= V4L2_BUF_TYPE_META_OUTPUT) + #define V4L2_TYPE_IS_MULTIPLANAR(type) \ ((type) == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE \ || (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) @@ -171,7 +179,8 @@ enum v4l2_buf_type { || (type) == V4L2_BUF_TYPE_SDR_OUTPUT \ || (type) == V4L2_BUF_TYPE_META_OUTPUT) -#define V4L2_TYPE_IS_CAPTURE(type) (!V4L2_TYPE_IS_OUTPUT(type)) +#define V4L2_TYPE_IS_CAPTURE(type) \ + (V4L2_TYPE_IS_VALID(type) && !V4L2_TYPE_IS_OUTPUT(type)) enum v4l2_tuner_type { V4L2_TUNER_RADIO = 1, -- 2.51.0 From 8e172e38a623ce284baf2514f963b29e4d47c62e Mon Sep 17 00:00:00 2001 From: Nas Chung Date: Thu, 25 Jul 2024 15:10:33 +0900 Subject: [PATCH 07/16] media: qcom: venus: Fix uninitialized variable warning Avoid uninitialized variable when both V4L2_TYPE_IS_OUTPUT() and V4L2_TYPE_IS_CAPTURE() return false. Signed-off-by: Nas Chung Signed-off-by: Sebastian Fricke Signed-off-by: Hans Verkuil --- drivers/media/platform/qcom/venus/vdec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c index 9f82882b77bc..39d0556d7237 100644 --- a/drivers/media/platform/qcom/venus/vdec.c +++ b/drivers/media/platform/qcom/venus/vdec.c @@ -154,14 +154,14 @@ find_format_by_index(struct venus_inst *inst, unsigned int index, u32 type) return NULL; for (i = 0; i < size; i++) { - bool valid; + bool valid = false; if (fmt[i].type != type) continue; if (V4L2_TYPE_IS_OUTPUT(type)) { valid = venus_helper_check_codec(inst, fmt[i].pixfmt); - } else if (V4L2_TYPE_IS_CAPTURE(type)) { + } else { valid = venus_helper_check_format(inst, fmt[i].pixfmt); if (fmt[i].pixfmt == V4L2_PIX_FMT_QC10C && -- 2.51.0 From f81f69a0e3da141bdd73a16b8676f4e542533d87 Mon Sep 17 00:00:00 2001 From: Nas Chung Date: Thu, 25 Jul 2024 15:10:34 +0900 Subject: [PATCH 08/16] media: uapi: v4l: Fix V4L2_TYPE_IS_OUTPUT condition V4L2_TYPE_IS_OUTPUT() returns true for V4L2_BUF_TYPE_VIDEO_OVERLAY which definitely belongs to CAPTURE. Signed-off-by: Nas Chung Signed-off-by: Sebastian Fricke Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index ccd6ad53432e..af86ece741e9 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -172,7 +172,6 @@ enum v4l2_buf_type { #define V4L2_TYPE_IS_OUTPUT(type) \ ((type) == V4L2_BUF_TYPE_VIDEO_OUTPUT \ || (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE \ - || (type) == V4L2_BUF_TYPE_VIDEO_OVERLAY \ || (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY \ || (type) == V4L2_BUF_TYPE_VBI_OUTPUT \ || (type) == V4L2_BUF_TYPE_SLICED_VBI_OUTPUT \ -- 2.51.0 From b312ac33a593b64f23b0ddba59c429e89c479a54 Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Tue, 1 Apr 2025 17:06:56 +0800 Subject: [PATCH 09/16] media: amphion: Reduce decoding latency for HEVC decoder The amphion decoder firmware supports a low latency flush mode for the HEVC format since v1.9.0. This feature, which is enabled when the display delay is set to 0, can help to reduce the decoding latency by appending some padding data to every frame. Signed-off-by: Ming Qian Reviewed-by: Nicolas Dufresne Signed-off-by: Sebastian Fricke Signed-off-by: Hans Verkuil --- drivers/media/platform/amphion/vpu_malone.c | 25 ++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/amphion/vpu_malone.c b/drivers/media/platform/amphion/vpu_malone.c index 4769c053c6c2..e07de425ab16 100644 --- a/drivers/media/platform/amphion/vpu_malone.c +++ b/drivers/media/platform/amphion/vpu_malone.c @@ -3,6 +3,7 @@ * Copyright 2020-2021 NXP */ +#include #include #include #include @@ -68,6 +69,12 @@ #define MALONE_DEC_FMT_RV_MASK BIT(21) +#define MALONE_VERSION_MASK 0xFFFFF +#define MALONE_VERSION(maj, min, inc) \ + (FIELD_PREP(0xF0000, maj) | FIELD_PREP(0xFF00, min) | FIELD_PREP(0xFF, inc)) +#define CHECK_VERSION(iface, maj, min) \ + (FIELD_GET(MALONE_VERSION_MASK, (iface)->fw_version) >= MALONE_VERSION(maj, min, 0)) + enum vpu_malone_stream_input_mode { INVALID_MODE = 0, FRAME_LVL, @@ -332,6 +339,8 @@ struct vpu_dec_ctrl { u32 buf_addr[VID_API_NUM_STREAMS]; }; +static const struct malone_padding_scode *get_padding_scode(u32 type, u32 fmt); + u32 vpu_malone_get_data_size(void) { return sizeof(struct vpu_dec_ctrl); @@ -654,9 +663,15 @@ static int vpu_malone_set_params(struct vpu_shared_addr *shared, hc->jpg[instance].jpg_mjpeg_interlaced = 0; } - hc->codec_param[instance].disp_imm = params->display_delay_enable ? 1 : 0; - if (malone_format != MALONE_FMT_AVC) + if (params->display_delay_enable && + get_padding_scode(SCODE_PADDING_BUFFLUSH, params->codec_format)) + hc->codec_param[instance].disp_imm = 1; + else hc->codec_param[instance].disp_imm = 0; + + if (params->codec_format == V4L2_PIX_FMT_HEVC && !CHECK_VERSION(iface, 1, 9)) + hc->codec_param[instance].disp_imm = 0; + hc->codec_param[instance].dbglog_enable = 0; iface->dbglog_desc.level = 0; @@ -1023,6 +1038,7 @@ static const struct malone_padding_scode padding_scodes[] = { {SCODE_PADDING_EOS, V4L2_PIX_FMT_JPEG, {0x0, 0x0}}, {SCODE_PADDING_BUFFLUSH, V4L2_PIX_FMT_H264, {0x15010000, 0x0}}, {SCODE_PADDING_BUFFLUSH, V4L2_PIX_FMT_H264_MVC, {0x15010000, 0x0}}, + {SCODE_PADDING_BUFFLUSH, V4L2_PIX_FMT_HEVC, {0x3e010000, 0x20}}, }; static const struct malone_padding_scode padding_scode_dft = {0x0, 0x0}; @@ -1057,8 +1073,11 @@ static int vpu_malone_add_padding_scode(struct vpu_buffer *stream_buffer, int ret; ps = get_padding_scode(scode_type, pixelformat); - if (!ps) + if (!ps) { + if (scode_type == SCODE_PADDING_BUFFLUSH) + return 0; return -EINVAL; + } wptr = readl(&str_buf->wptr); if (wptr < stream_buffer->phys || wptr > stream_buffer->phys + stream_buffer->length) -- 2.51.0 From 9ea16ba6eaf93f25f61855751f71e2e701709ddf Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Tue, 1 Apr 2025 17:06:57 +0800 Subject: [PATCH 10/16] media: amphion: Add a frame flush mode for decoder By default the amphion decoder will pre-parse 3 frames before starting to decode the first frame. Alternatively, a block of flush padding data can be appended to the frame, which will ensure that the decoder can start decoding immediately after parsing the flush padding data, thus potentially reducing decoding latency. This mode was previously only enabled, when the display delay was set to 0. Allow the user to manually toggle the use of that mode via a module parameter called low_latency, which enables the mode without changing the display order. Signed-off-by: Ming Qian Reviewed-by: Nicolas Dufresne Signed-off-by: Sebastian Fricke Signed-off-by: Hans Verkuil --- drivers/media/platform/amphion/vpu_malone.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/amphion/vpu_malone.c b/drivers/media/platform/amphion/vpu_malone.c index e07de425ab16..feca7d4220ed 100644 --- a/drivers/media/platform/amphion/vpu_malone.c +++ b/drivers/media/platform/amphion/vpu_malone.c @@ -26,6 +26,10 @@ #include "vpu_imx8q.h" #include "vpu_malone.h" +static bool low_latency; +module_param(low_latency, bool, 0644); +MODULE_PARM_DESC(low_latency, "Set low latency frame flush mode: 0 (disable) or 1 (enable)"); + #define CMD_SIZE 25600 #define MSG_SIZE 25600 #define CODEC_SIZE 0x1000 @@ -1581,7 +1585,15 @@ static int vpu_malone_input_frame_data(struct vpu_malone_str_buffer __iomem *str vpu_malone_update_wptr(str_buf, wptr); - if (disp_imm && !vpu_vb_is_codecconfig(vbuf)) { + /* + * Enable the low latency flush mode if display delay is set to 0 + * or the low latency frame flush mode if it is set to 1. + * The low latency flush mode requires some padding data to be appended to each frame, + * but there must not be any padding data between the sequence header and the frame. + * This module is currently only supported for the H264 and HEVC formats, + * for other formats, vpu_malone_add_scode() will return 0. + */ + if ((disp_imm || low_latency) && !vpu_vb_is_codecconfig(vbuf)) { ret = vpu_malone_add_scode(inst->core->iface, inst->id, &inst->stream_buffer, -- 2.51.0 From 22f572ce4e7c7fa18f46a445d4270e6d5b5429f2 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Tue, 25 Feb 2025 10:40:22 +0100 Subject: [PATCH 11/16] media: v4l2-common: Add helpers to calculate bytesperline and sizeimage Add helper functions to calculate plane bytesperline and sizeimage, these new helpers consider bpp div, block width and height when calculating plane bytesperline and sizeimage. Signed-off-by: Jonas Karlman Reviewed-by: Nicolas Dufresne Tested-by: Nicolas Dufresne Tested-by: Christopher Obbard Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil --- drivers/media/v4l2-core/v4l2-common.c | 78 +++++++++++++-------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-common.c b/drivers/media/v4l2-core/v4l2-common.c index e4b2de3833ee..b7e608f1145d 100644 --- a/drivers/media/v4l2-core/v4l2-common.c +++ b/drivers/media/v4l2-core/v4l2-common.c @@ -357,6 +357,34 @@ static inline unsigned int v4l2_format_block_height(const struct v4l2_format_inf return info->block_h[plane]; } +static inline unsigned int v4l2_format_plane_stride(const struct v4l2_format_info *info, int plane, + unsigned int width) +{ + unsigned int hdiv = plane ? info->hdiv : 1; + unsigned int aligned_width = + ALIGN(width, v4l2_format_block_width(info, plane)); + + return DIV_ROUND_UP(aligned_width, hdiv) * + info->bpp[plane] / info->bpp_div[plane]; +} + +static inline unsigned int v4l2_format_plane_height(const struct v4l2_format_info *info, int plane, + unsigned int height) +{ + unsigned int vdiv = plane ? info->vdiv : 1; + unsigned int aligned_height = + ALIGN(height, v4l2_format_block_height(info, plane)); + + return DIV_ROUND_UP(aligned_height, vdiv); +} + +static inline unsigned int v4l2_format_plane_size(const struct v4l2_format_info *info, int plane, + unsigned int width, unsigned int height) +{ + return v4l2_format_plane_stride(info, plane, width) * + v4l2_format_plane_height(info, plane, height); +} + void v4l2_apply_frmsize_constraints(u32 *width, u32 *height, const struct v4l2_frmsize_stepwise *frmsize) { @@ -392,37 +420,19 @@ int v4l2_fill_pixfmt_mp(struct v4l2_pix_format_mplane *pixfmt, if (info->mem_planes == 1) { plane = &pixfmt->plane_fmt[0]; - plane->bytesperline = ALIGN(width, v4l2_format_block_width(info, 0)) * info->bpp[0] / info->bpp_div[0]; + plane->bytesperline = v4l2_format_plane_stride(info, 0, width); plane->sizeimage = 0; - for (i = 0; i < info->comp_planes; i++) { - unsigned int hdiv = (i == 0) ? 1 : info->hdiv; - unsigned int vdiv = (i == 0) ? 1 : info->vdiv; - unsigned int aligned_width; - unsigned int aligned_height; - - aligned_width = ALIGN(width, v4l2_format_block_width(info, i)); - aligned_height = ALIGN(height, v4l2_format_block_height(info, i)); - - plane->sizeimage += info->bpp[i] * - DIV_ROUND_UP(aligned_width, hdiv) * - DIV_ROUND_UP(aligned_height, vdiv) / info->bpp_div[i]; - } + for (i = 0; i < info->comp_planes; i++) + plane->sizeimage += + v4l2_format_plane_size(info, i, width, height); } else { for (i = 0; i < info->comp_planes; i++) { - unsigned int hdiv = (i == 0) ? 1 : info->hdiv; - unsigned int vdiv = (i == 0) ? 1 : info->vdiv; - unsigned int aligned_width; - unsigned int aligned_height; - - aligned_width = ALIGN(width, v4l2_format_block_width(info, i)); - aligned_height = ALIGN(height, v4l2_format_block_height(info, i)); - plane = &pixfmt->plane_fmt[i]; plane->bytesperline = - info->bpp[i] * DIV_ROUND_UP(aligned_width, hdiv) / info->bpp_div[i]; - plane->sizeimage = - plane->bytesperline * DIV_ROUND_UP(aligned_height, vdiv); + v4l2_format_plane_stride(info, i, width); + plane->sizeimage = plane->bytesperline * + v4l2_format_plane_height(info, i, height); } } return 0; @@ -446,22 +456,12 @@ int v4l2_fill_pixfmt(struct v4l2_pix_format *pixfmt, u32 pixelformat, pixfmt->width = width; pixfmt->height = height; pixfmt->pixelformat = pixelformat; - pixfmt->bytesperline = ALIGN(width, v4l2_format_block_width(info, 0)) * info->bpp[0] / info->bpp_div[0]; + pixfmt->bytesperline = v4l2_format_plane_stride(info, 0, width); pixfmt->sizeimage = 0; - for (i = 0; i < info->comp_planes; i++) { - unsigned int hdiv = (i == 0) ? 1 : info->hdiv; - unsigned int vdiv = (i == 0) ? 1 : info->vdiv; - unsigned int aligned_width; - unsigned int aligned_height; - - aligned_width = ALIGN(width, v4l2_format_block_width(info, i)); - aligned_height = ALIGN(height, v4l2_format_block_height(info, i)); - - pixfmt->sizeimage += info->bpp[i] * - DIV_ROUND_UP(aligned_width, hdiv) * - DIV_ROUND_UP(aligned_height, vdiv) / info->bpp_div[i]; - } + for (i = 0; i < info->comp_planes; i++) + pixfmt->sizeimage += + v4l2_format_plane_size(info, i, width, height); return 0; } EXPORT_SYMBOL_GPL(v4l2_fill_pixfmt); -- 2.51.0 From dcbe2aeda2e09eb69f5feba7e171db2836d9999d Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Tue, 25 Feb 2025 10:40:23 +0100 Subject: [PATCH 12/16] media: v4l2: Add NV15 and NV20 pixel formats Add NV15 and NV20 pixel formats used by the Rockchip Video Decoder for 10-bit buffers. NV15 and NV20 is 10-bit 4:2:0/4:2:2 semi-planar YUV formats similar to NV12 and NV16, using 10-bit components with no padding between each component. Instead, a group of 4 luminance/chrominance samples are stored over 5 bytes in little endian order: YYYY = UVUV = 4 * 10 bits = 40 bits = 5 bytes The '15' and '20' suffix refers to the optimum effective bits per pixel which is achieved when the total number of luminance samples is a multiple of 8 for NV15 and 4 for NV20. Signed-off-by: Jonas Karlman Reviewed-by: Nicolas Dufresne Tested-by: Nicolas Dufresne Tested-by: Christopher Obbard Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil --- .../media/v4l/pixfmt-yuv-planar.rst | 128 ++++++++++++++++++ drivers/media/v4l2-core/v4l2-common.c | 2 + drivers/media/v4l2-core/v4l2-ioctl.c | 2 + include/uapi/linux/videodev2.h | 2 + 4 files changed, 134 insertions(+) diff --git a/Documentation/userspace-api/media/v4l/pixfmt-yuv-planar.rst b/Documentation/userspace-api/media/v4l/pixfmt-yuv-planar.rst index b788f6933855..6e4f399f1f88 100644 --- a/Documentation/userspace-api/media/v4l/pixfmt-yuv-planar.rst +++ b/Documentation/userspace-api/media/v4l/pixfmt-yuv-planar.rst @@ -137,6 +137,13 @@ All components are stored with the same number of bits per component. - Cb, Cr - No - Linear + * - V4L2_PIX_FMT_NV15 + - 'NV15' + - 10 + - 4:2:0 + - Cb, Cr + - Yes + - Linear * - V4L2_PIX_FMT_NV15_4L4 - 'VT15' - 15 @@ -186,6 +193,13 @@ All components are stored with the same number of bits per component. - Cr, Cb - No - Linear + * - V4L2_PIX_FMT_NV20 + - 'NV20' + - 10 + - 4:2:2 + - Cb, Cr + - Yes + - Linear * - V4L2_PIX_FMT_NV24 - 'NV24' - 8 @@ -302,6 +316,57 @@ of the luma plane. - Cr\ :sub:`11` +.. _V4L2-PIX-FMT-NV15: + +NV15 +---- + +Semi-planar 10-bit YUV 4:2:0 format similar to NV12, using 10-bit components +with no padding between each component. A group of 4 components are stored over +5 bytes in little endian order. + +.. flat-table:: Sample 4x4 NV15 Image (1 byte per cell) + :header-rows: 0 + :stub-columns: 0 + + * - start + 0: + - Y'\ :sub:`00[7:0]` + - Y'\ :sub:`01[5:0]`\ Y'\ :sub:`00[9:8]` + - Y'\ :sub:`02[3:0]`\ Y'\ :sub:`01[9:6]` + - Y'\ :sub:`03[1:0]`\ Y'\ :sub:`02[9:4]` + - Y'\ :sub:`03[9:2]` + * - start + 5: + - Y'\ :sub:`10[7:0]` + - Y'\ :sub:`11[5:0]`\ Y'\ :sub:`10[9:8]` + - Y'\ :sub:`12[3:0]`\ Y'\ :sub:`11[9:6]` + - Y'\ :sub:`13[1:0]`\ Y'\ :sub:`12[9:4]` + - Y'\ :sub:`13[9:2]` + * - start + 10: + - Y'\ :sub:`20[7:0]` + - Y'\ :sub:`21[5:0]`\ Y'\ :sub:`20[9:8]` + - Y'\ :sub:`22[3:0]`\ Y'\ :sub:`21[9:6]` + - Y'\ :sub:`23[1:0]`\ Y'\ :sub:`22[9:4]` + - Y'\ :sub:`23[9:2]` + * - start + 15: + - Y'\ :sub:`30[7:0]` + - Y'\ :sub:`31[5:0]`\ Y'\ :sub:`30[9:8]` + - Y'\ :sub:`32[3:0]`\ Y'\ :sub:`31[9:6]` + - Y'\ :sub:`33[1:0]`\ Y'\ :sub:`32[9:4]` + - Y'\ :sub:`33[9:2]` + * - start + 20: + - Cb\ :sub:`00[7:0]` + - Cr\ :sub:`00[5:0]`\ Cb\ :sub:`00[9:8]` + - Cb\ :sub:`01[3:0]`\ Cr\ :sub:`00[9:6]` + - Cr\ :sub:`01[1:0]`\ Cb\ :sub:`01[9:4]` + - Cr\ :sub:`01[9:2]` + * - start + 25: + - Cb\ :sub:`10[7:0]` + - Cr\ :sub:`10[5:0]`\ Cb\ :sub:`10[9:8]` + - Cb\ :sub:`11[3:0]`\ Cr\ :sub:`10[9:6]` + - Cr\ :sub:`11[1:0]`\ Cb\ :sub:`11[9:4]` + - Cr\ :sub:`11[9:2]` + + .. _V4L2-PIX-FMT-NV12MT: .. _V4L2-PIX-FMT-NV12MT-16X16: .. _V4L2-PIX-FMT-NV12-4L4: @@ -631,6 +696,69 @@ number of lines as the luma plane. - Cr\ :sub:`32` +.. _V4L2-PIX-FMT-NV20: + +NV20 +---- + +Semi-planar 10-bit YUV 4:2:2 format similar to NV16, using 10-bit components +with no padding between each component. A group of 4 components are stored over +5 bytes in little endian order. + +.. flat-table:: Sample 4x4 NV20 Image (1 byte per cell) + :header-rows: 0 + :stub-columns: 0 + + * - start + 0: + - Y'\ :sub:`00[7:0]` + - Y'\ :sub:`01[5:0]`\ Y'\ :sub:`00[9:8]` + - Y'\ :sub:`02[3:0]`\ Y'\ :sub:`01[9:6]` + - Y'\ :sub:`03[1:0]`\ Y'\ :sub:`02[9:4]` + - Y'\ :sub:`03[9:2]` + * - start + 5: + - Y'\ :sub:`10[7:0]` + - Y'\ :sub:`11[5:0]`\ Y'\ :sub:`10[9:8]` + - Y'\ :sub:`12[3:0]`\ Y'\ :sub:`11[9:6]` + - Y'\ :sub:`13[1:0]`\ Y'\ :sub:`12[9:4]` + - Y'\ :sub:`13[9:2]` + * - start + 10: + - Y'\ :sub:`20[7:0]` + - Y'\ :sub:`21[5:0]`\ Y'\ :sub:`20[9:8]` + - Y'\ :sub:`22[3:0]`\ Y'\ :sub:`21[9:6]` + - Y'\ :sub:`23[1:0]`\ Y'\ :sub:`22[9:4]` + - Y'\ :sub:`23[9:2]` + * - start + 15: + - Y'\ :sub:`30[7:0]` + - Y'\ :sub:`31[5:0]`\ Y'\ :sub:`30[9:8]` + - Y'\ :sub:`32[3:0]`\ Y'\ :sub:`31[9:6]` + - Y'\ :sub:`33[1:0]`\ Y'\ :sub:`32[9:4]` + - Y'\ :sub:`33[9:2]` + * - start + 20: + - Cb\ :sub:`00[7:0]` + - Cr\ :sub:`00[5:0]`\ Cb\ :sub:`00[9:8]` + - Cb\ :sub:`01[3:0]`\ Cr\ :sub:`00[9:6]` + - Cr\ :sub:`01[1:0]`\ Cb\ :sub:`01[9:4]` + - Cr\ :sub:`01[9:2]` + * - start + 25: + - Cb\ :sub:`10[7:0]` + - Cr\ :sub:`10[5:0]`\ Cb\ :sub:`10[9:8]` + - Cb\ :sub:`11[3:0]`\ Cr\ :sub:`10[9:6]` + - Cr\ :sub:`11[1:0]`\ Cb\ :sub:`11[9:4]` + - Cr\ :sub:`11[9:2]` + * - start + 30: + - Cb\ :sub:`20[7:0]` + - Cr\ :sub:`20[5:0]`\ Cb\ :sub:`20[9:8]` + - Cb\ :sub:`21[3:0]`\ Cr\ :sub:`20[9:6]` + - Cr\ :sub:`21[1:0]`\ Cb\ :sub:`21[9:4]` + - Cr\ :sub:`21[9:2]` + * - start + 35: + - Cb\ :sub:`30[7:0]` + - Cr\ :sub:`30[5:0]`\ Cb\ :sub:`30[9:8]` + - Cb\ :sub:`31[3:0]`\ Cr\ :sub:`30[9:6]` + - Cr\ :sub:`31[1:0]`\ Cb\ :sub:`31[9:4]` + - Cr\ :sub:`31[9:2]` + + .. _V4L2-PIX-FMT-NV24: .. _V4L2-PIX-FMT-NV42: diff --git a/drivers/media/v4l2-core/v4l2-common.c b/drivers/media/v4l2-core/v4l2-common.c index b7e608f1145d..aa86b8c6aa75 100644 --- a/drivers/media/v4l2-core/v4l2-common.c +++ b/drivers/media/v4l2-core/v4l2-common.c @@ -277,8 +277,10 @@ const struct v4l2_format_info *v4l2_format_info(u32 format) /* YUV planar formats */ { .format = V4L2_PIX_FMT_NV12, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 2 }, { .format = V4L2_PIX_FMT_NV21, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 2 }, + { .format = V4L2_PIX_FMT_NV15, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 5, 10, 0, 0 }, .bpp_div = { 4, 4, 1, 1 }, .hdiv = 2, .vdiv = 2 }, { .format = V4L2_PIX_FMT_NV16, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_NV61, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, + { .format = V4L2_PIX_FMT_NV20, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 5, 10, 0, 0 }, .bpp_div = { 4, 4, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_NV24, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_NV42, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_P010, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 2, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index a16fb44c7246..e97881f74c0d 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -1363,8 +1363,10 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt) case V4L2_PIX_FMT_YUV48_12: descr = "12-bit YUV 4:4:4 Packed"; break; case V4L2_PIX_FMT_NV12: descr = "Y/UV 4:2:0"; break; case V4L2_PIX_FMT_NV21: descr = "Y/VU 4:2:0"; break; + case V4L2_PIX_FMT_NV15: descr = "10-bit Y/UV 4:2:0 (Packed)"; break; case V4L2_PIX_FMT_NV16: descr = "Y/UV 4:2:2"; break; case V4L2_PIX_FMT_NV61: descr = "Y/VU 4:2:2"; break; + case V4L2_PIX_FMT_NV20: descr = "10-bit Y/UV 4:2:2 (Packed)"; break; case V4L2_PIX_FMT_NV24: descr = "Y/UV 4:4:4"; break; case V4L2_PIX_FMT_NV42: descr = "Y/VU 4:4:4"; break; case V4L2_PIX_FMT_P010: descr = "10-bit Y/UV 4:2:0"; break; diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index af86ece741e9..ca7b3e8863ca 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -651,8 +651,10 @@ struct v4l2_pix_format { /* two planes -- one Y, one Cr + Cb interleaved */ #define V4L2_PIX_FMT_NV12 v4l2_fourcc('N', 'V', '1', '2') /* 12 Y/CbCr 4:2:0 */ #define V4L2_PIX_FMT_NV21 v4l2_fourcc('N', 'V', '2', '1') /* 12 Y/CrCb 4:2:0 */ +#define V4L2_PIX_FMT_NV15 v4l2_fourcc('N', 'V', '1', '5') /* 15 Y/CbCr 4:2:0 10-bit packed */ #define V4L2_PIX_FMT_NV16 v4l2_fourcc('N', 'V', '1', '6') /* 16 Y/CbCr 4:2:2 */ #define V4L2_PIX_FMT_NV61 v4l2_fourcc('N', 'V', '6', '1') /* 16 Y/CrCb 4:2:2 */ +#define V4L2_PIX_FMT_NV20 v4l2_fourcc('N', 'V', '2', '0') /* 20 Y/CbCr 4:2:2 10-bit packed */ #define V4L2_PIX_FMT_NV24 v4l2_fourcc('N', 'V', '2', '4') /* 24 Y/CbCr 4:4:4 */ #define V4L2_PIX_FMT_NV42 v4l2_fourcc('N', 'V', '4', '2') /* 24 Y/CrCb 4:4:4 */ #define V4L2_PIX_FMT_P010 v4l2_fourcc('P', '0', '1', '0') /* 24 Y/CbCr 4:2:0 10-bit per component */ -- 2.51.0 From d5e0aa61470c48ddc04d433a00e79cef8716377a Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Tue, 25 Feb 2025 10:40:24 +0100 Subject: [PATCH 13/16] media: rkvdec: h264: Use bytesperline and buffer height as virstride Use bytesperline and buffer height to calculate the strides configured. This does not really change anything other than ensuring the bytesperline that is signaled to userspace matches what is configured in HW. Signed-off-by: Jonas Karlman Reviewed-by: Nicolas Dufresne Tested-by: Nicolas Dufresne Tested-by: Christopher Obbard Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil --- drivers/staging/media/rkvdec/rkvdec-h264.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/staging/media/rkvdec/rkvdec-h264.c b/drivers/staging/media/rkvdec/rkvdec-h264.c index 4fc167b42cf0..7a1e76d423df 100644 --- a/drivers/staging/media/rkvdec/rkvdec-h264.c +++ b/drivers/staging/media/rkvdec/rkvdec-h264.c @@ -896,9 +896,9 @@ static void config_registers(struct rkvdec_ctx *ctx, dma_addr_t rlc_addr; dma_addr_t refer_addr; u32 rlc_len; - u32 hor_virstride = 0; - u32 ver_virstride = 0; - u32 y_virstride = 0; + u32 hor_virstride; + u32 ver_virstride; + u32 y_virstride; u32 yuv_virstride = 0; u32 offset; dma_addr_t dst_addr; @@ -909,16 +909,16 @@ static void config_registers(struct rkvdec_ctx *ctx, f = &ctx->decoded_fmt; dst_fmt = &f->fmt.pix_mp; - hor_virstride = (sps->bit_depth_luma_minus8 + 8) * dst_fmt->width / 8; - ver_virstride = round_up(dst_fmt->height, 16); + hor_virstride = dst_fmt->plane_fmt[0].bytesperline; + ver_virstride = dst_fmt->height; y_virstride = hor_virstride * ver_virstride; if (sps->chroma_format_idc == 0) yuv_virstride = y_virstride; else if (sps->chroma_format_idc == 1) - yuv_virstride += y_virstride + y_virstride / 2; + yuv_virstride = y_virstride + y_virstride / 2; else if (sps->chroma_format_idc == 2) - yuv_virstride += 2 * y_virstride; + yuv_virstride = 2 * y_virstride; reg = RKVDEC_Y_HOR_VIRSTRIDE(hor_virstride / 16) | RKVDEC_UV_HOR_VIRSTRIDE(hor_virstride / 16) | -- 2.51.0 From 137149c63993c235fa37624cac6368f3cb4affc9 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Tue, 25 Feb 2025 10:40:25 +0100 Subject: [PATCH 14/16] media: rkvdec: h264: Don't hardcode SPS/PPS parameters Some SPS/PPS parameters are currently hardcoded in the driver even though they exist in the stable uapi controls. Use values from SPS/PPS controls instead of hardcoding them. Signed-off-by: Alex Bee [jonas@kwiboo.se: constraint_set_flags condition, commit message] Signed-off-by: Jonas Karlman Reviewed-by: Nicolas Dufresne Tested-by: Nicolas Dufresne Tested-by: Christopher Obbard Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil --- drivers/staging/media/rkvdec/rkvdec-h264.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/staging/media/rkvdec/rkvdec-h264.c b/drivers/staging/media/rkvdec/rkvdec-h264.c index 7a1e76d423df..8bce8902b8dd 100644 --- a/drivers/staging/media/rkvdec/rkvdec-h264.c +++ b/drivers/staging/media/rkvdec/rkvdec-h264.c @@ -655,13 +655,14 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx, #define WRITE_PPS(value, field) set_ps_field(hw_ps->info, field, value) /* write sps */ - WRITE_PPS(0xf, SEQ_PARAMETER_SET_ID); - WRITE_PPS(0xff, PROFILE_IDC); - WRITE_PPS(1, CONSTRAINT_SET3_FLAG); + WRITE_PPS(sps->seq_parameter_set_id, SEQ_PARAMETER_SET_ID); + WRITE_PPS(sps->profile_idc, PROFILE_IDC); + WRITE_PPS(!!(sps->constraint_set_flags & (1 << 3)), CONSTRAINT_SET3_FLAG); WRITE_PPS(sps->chroma_format_idc, CHROMA_FORMAT_IDC); WRITE_PPS(sps->bit_depth_luma_minus8, BIT_DEPTH_LUMA); WRITE_PPS(sps->bit_depth_chroma_minus8, BIT_DEPTH_CHROMA); - WRITE_PPS(0, QPPRIME_Y_ZERO_TRANSFORM_BYPASS_FLAG); + WRITE_PPS(!!(sps->flags & V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS), + QPPRIME_Y_ZERO_TRANSFORM_BYPASS_FLAG); WRITE_PPS(sps->log2_max_frame_num_minus4, LOG2_MAX_FRAME_NUM_MINUS4); WRITE_PPS(sps->max_num_ref_frames, MAX_NUM_REF_FRAMES); WRITE_PPS(sps->pic_order_cnt_type, PIC_ORDER_CNT_TYPE); @@ -688,8 +689,8 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx, DIRECT_8X8_INFERENCE_FLAG); /* write pps */ - WRITE_PPS(0xff, PIC_PARAMETER_SET_ID); - WRITE_PPS(0x1f, PPS_SEQ_PARAMETER_SET_ID); + WRITE_PPS(pps->pic_parameter_set_id, PIC_PARAMETER_SET_ID); + WRITE_PPS(pps->seq_parameter_set_id, PPS_SEQ_PARAMETER_SET_ID); WRITE_PPS(!!(pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE), ENTROPY_CODING_MODE_FLAG); WRITE_PPS(!!(pps->flags & V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT), -- 2.51.0 From c74b78193ffd0a5cfdc7e0c3fa1d128e6c0c42d6 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Tue, 25 Feb 2025 10:40:26 +0100 Subject: [PATCH 15/16] media: rkvdec: Extract rkvdec_fill_decoded_pixfmt into helper Extract call to v4l2_fill_pixfmt_mp() and ajusting of sizeimage into a helper. Replace current code with a call to the new helper. Signed-off-by: Jonas Karlman Reviewed-by: Nicolas Dufresne Tested-by: Nicolas Dufresne Tested-by: Christopher Obbard Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil --- drivers/staging/media/rkvdec/rkvdec.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/staging/media/rkvdec/rkvdec.c b/drivers/staging/media/rkvdec/rkvdec.c index f9bef5173bf2..e354360f4acc 100644 --- a/drivers/staging/media/rkvdec/rkvdec.c +++ b/drivers/staging/media/rkvdec/rkvdec.c @@ -27,6 +27,16 @@ #include "rkvdec.h" #include "rkvdec-regs.h" +static void rkvdec_fill_decoded_pixfmt(struct rkvdec_ctx *ctx, + struct v4l2_pix_format_mplane *pix_mp) +{ + v4l2_fill_pixfmt_mp(pix_mp, pix_mp->pixelformat, + pix_mp->width, pix_mp->height); + pix_mp->plane_fmt[0].sizeimage += 128 * + DIV_ROUND_UP(pix_mp->width, 16) * + DIV_ROUND_UP(pix_mp->height, 16); +} + static int rkvdec_try_ctrl(struct v4l2_ctrl *ctrl) { struct rkvdec_ctx *ctx = container_of(ctrl->handler, struct rkvdec_ctx, ctrl_hdl); @@ -192,13 +202,9 @@ static void rkvdec_reset_decoded_fmt(struct rkvdec_ctx *ctx) rkvdec_reset_fmt(ctx, f, ctx->coded_fmt_desc->decoded_fmts[0]); f->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; - v4l2_fill_pixfmt_mp(&f->fmt.pix_mp, - ctx->coded_fmt_desc->decoded_fmts[0], - ctx->coded_fmt.fmt.pix_mp.width, - ctx->coded_fmt.fmt.pix_mp.height); - f->fmt.pix_mp.plane_fmt[0].sizeimage += 128 * - DIV_ROUND_UP(f->fmt.pix_mp.width, 16) * - DIV_ROUND_UP(f->fmt.pix_mp.height, 16); + f->fmt.pix_mp.width = ctx->coded_fmt.fmt.pix_mp.width; + f->fmt.pix_mp.height = ctx->coded_fmt.fmt.pix_mp.height; + rkvdec_fill_decoded_pixfmt(ctx, &f->fmt.pix_mp); } static int rkvdec_enum_framesizes(struct file *file, void *priv, @@ -264,12 +270,7 @@ static int rkvdec_try_capture_fmt(struct file *file, void *priv, &pix_mp->height, &coded_desc->frmsize); - v4l2_fill_pixfmt_mp(pix_mp, pix_mp->pixelformat, - pix_mp->width, pix_mp->height); - pix_mp->plane_fmt[0].sizeimage += - 128 * - DIV_ROUND_UP(pix_mp->width, 16) * - DIV_ROUND_UP(pix_mp->height, 16); + rkvdec_fill_decoded_pixfmt(ctx, pix_mp); pix_mp->field = V4L2_FIELD_NONE; return 0; -- 2.51.0 From 98a0aa1b6910766a23b5162a1499696837e5d3ca Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Tue, 25 Feb 2025 10:40:27 +0100 Subject: [PATCH 16/16] media: rkvdec: Move rkvdec_reset_decoded_fmt helper Move rkvdec_reset_decoded_fmt() and the called rkvdec_reset_fmt() helper functions in preparation for adding a new caller in an upcoming patch. Signed-off-by: Jonas Karlman Reviewed-by: Nicolas Dufresne Tested-by: Nicolas Dufresne Tested-by: Christopher Obbard Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil --- drivers/staging/media/rkvdec/rkvdec.c | 46 +++++++++++++-------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/staging/media/rkvdec/rkvdec.c b/drivers/staging/media/rkvdec/rkvdec.c index e354360f4acc..1f8f98cf91dc 100644 --- a/drivers/staging/media/rkvdec/rkvdec.c +++ b/drivers/staging/media/rkvdec/rkvdec.c @@ -37,6 +37,29 @@ static void rkvdec_fill_decoded_pixfmt(struct rkvdec_ctx *ctx, DIV_ROUND_UP(pix_mp->height, 16); } +static void rkvdec_reset_fmt(struct rkvdec_ctx *ctx, struct v4l2_format *f, + u32 fourcc) +{ + memset(f, 0, sizeof(*f)); + f->fmt.pix_mp.pixelformat = fourcc; + f->fmt.pix_mp.field = V4L2_FIELD_NONE; + f->fmt.pix_mp.colorspace = V4L2_COLORSPACE_REC709; + f->fmt.pix_mp.ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT; + f->fmt.pix_mp.quantization = V4L2_QUANTIZATION_DEFAULT; + f->fmt.pix_mp.xfer_func = V4L2_XFER_FUNC_DEFAULT; +} + +static void rkvdec_reset_decoded_fmt(struct rkvdec_ctx *ctx) +{ + struct v4l2_format *f = &ctx->decoded_fmt; + + rkvdec_reset_fmt(ctx, f, ctx->coded_fmt_desc->decoded_fmts[0]); + f->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; + f->fmt.pix_mp.width = ctx->coded_fmt.fmt.pix_mp.width; + f->fmt.pix_mp.height = ctx->coded_fmt.fmt.pix_mp.height; + rkvdec_fill_decoded_pixfmt(ctx, &f->fmt.pix_mp); +} + static int rkvdec_try_ctrl(struct v4l2_ctrl *ctrl) { struct rkvdec_ctx *ctx = container_of(ctrl->handler, struct rkvdec_ctx, ctrl_hdl); @@ -169,18 +192,6 @@ rkvdec_find_coded_fmt_desc(u32 fourcc) return NULL; } -static void rkvdec_reset_fmt(struct rkvdec_ctx *ctx, struct v4l2_format *f, - u32 fourcc) -{ - memset(f, 0, sizeof(*f)); - f->fmt.pix_mp.pixelformat = fourcc; - f->fmt.pix_mp.field = V4L2_FIELD_NONE; - f->fmt.pix_mp.colorspace = V4L2_COLORSPACE_REC709; - f->fmt.pix_mp.ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT; - f->fmt.pix_mp.quantization = V4L2_QUANTIZATION_DEFAULT; - f->fmt.pix_mp.xfer_func = V4L2_XFER_FUNC_DEFAULT; -} - static void rkvdec_reset_coded_fmt(struct rkvdec_ctx *ctx) { struct v4l2_format *f = &ctx->coded_fmt; @@ -196,17 +207,6 @@ static void rkvdec_reset_coded_fmt(struct rkvdec_ctx *ctx) ctx->coded_fmt_desc->ops->adjust_fmt(ctx, f); } -static void rkvdec_reset_decoded_fmt(struct rkvdec_ctx *ctx) -{ - struct v4l2_format *f = &ctx->decoded_fmt; - - rkvdec_reset_fmt(ctx, f, ctx->coded_fmt_desc->decoded_fmts[0]); - f->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; - f->fmt.pix_mp.width = ctx->coded_fmt.fmt.pix_mp.width; - f->fmt.pix_mp.height = ctx->coded_fmt.fmt.pix_mp.height; - rkvdec_fill_decoded_pixfmt(ctx, &f->fmt.pix_mp); -} - static int rkvdec_enum_framesizes(struct file *file, void *priv, struct v4l2_frmsizeenum *fsize) { -- 2.51.0