When the file descriptor is closed while a job is still running,
there's a race condition between the job completion callback and the
file descriptor cleanup. This can lead to accessing freed memory when
updating per-fd GPU stats, such as the following example:
[56120.512903] Unable to handle kernel paging request at virtual address
0000330a92b9688a
[56120.520881] Mem abort info:
[56120.523687] ESR = 0x0000000096000005
[56120.527454] EC = 0x25: DABT (current EL), IL = 32 bits
[56120.532785] SET = 0, FnV = 0
[56120.535847] EA = 0, S1PTW = 0
[56120.538995] FSC = 0x05: level 1 translation fault
[56120.543891] Data abort info:
[56120.546778] ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
[56120.552289] CM = 0, WnR = 0, TnD = 0, TagAccess = 0
[56120.557362] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[56120.562690] user pgtable: 16k pages, 47-bit VAs, pgdp=
0000000023f54000
[56120.569239] [
0000330a92b9688a] pgd=
0000000000000000, p4d=
0000000000000000, pud=
0000000000000000
[56120.577975] Internal error: Oops:
0000000096000005 [#1] PREEMPT SMP
CPU: 0 UID: 1000 PID:
1497409 Comm: mpv Not tainted 6.12.37-ncvm5+ #1
Hardware name: Raspberry Pi 5 Model B Rev 1.0 (DT)
pstate:
604000c9 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : v3d_job_update_stats+0x64/0x168 [v3d]
lr : v3d_job_update_stats+0x40/0x168 [v3d]
sp :
ffffc00080003e60
x29:
ffffc00080003e60 x28:
ffff800002860000 x27:
0000000000000000
x26:
0000000000000000 x25:
ffff800002860000 x24:
ffff800002630800
x23:
ffff800060786000 x22:
0000330a933c31fb x21:
0000000000000001
x20:
0000330a92b96302 x19:
ffff800060786b10 x18:
0000000000000000
x17:
ffffaf90506a0000 x16:
ffffd06fce57c360 x15:
0000000000000000
x14:
0000000000000000 x13:
0000000000000000 x12:
0000000000000000
x11:
0000000000000000 x10:
0000000000000000 x9 :
ffffd06f5d0fec40
x8 :
0000000000000000 x7 :
0000000000000000 x6 :
000002978dbd535a
x5 :
00ffffffffffffff x4 :
0000000000000015 x3 :
0000300001fddf88
x2 :
0000000000000020 x1 :
0000000000010001 x0 :
0000330a92b96872
Call trace:
v3d_job_update_stats+0x64/0x168 [v3d]
v3d_irq+0x118/0x2e0 [v3d]
__handle_irq_event_percpu+0x60/0x220
Fix such an issue by protecting all accesses to `job->file_priv` with
the queue's lock. With that, we can clear `job->file_priv` before the
V3D per-fd structure is freed and assure that `job->file_priv` exists
during the per-fd GPU stats updates.
Fixes: e1bc3a13bd77 ("drm/v3d: Avoid NULL pointer dereference in `v3d_job_update_stats()`")
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://lore.kernel.org/r/20250826-v3d-queue-lock-v3-4-979efc43e490@igalia.com
Signed-off-by: Maíra Canal <mcanal@igalia.com>
static void
v3d_postclose(struct drm_device *dev, struct drm_file *file)
{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
struct v3d_file_priv *v3d_priv = file->driver_priv;
+ unsigned long irqflags;
enum v3d_queue q;
- for (q = 0; q < V3D_MAX_QUEUES; q++)
+ for (q = 0; q < V3D_MAX_QUEUES; q++) {
+ struct v3d_queue_state *queue = &v3d->queue[q];
+ struct v3d_job *job = queue->active_job;
+
drm_sched_entity_destroy(&v3d_priv->sched_entity[q]);
+ if (job && job->base.entity == &v3d_priv->sched_entity[q]) {
+ spin_lock_irqsave(&queue->queue_lock, irqflags);
+ job->file_priv = NULL;
+ spin_unlock_irqrestore(&queue->queue_lock, irqflags);
+ }
+ }
+
v3d_perfmon_close_file(v3d_priv);
kfree(v3d_priv);
}
unsigned int count);
void v3d_performance_query_info_free(struct v3d_performance_query_info *query_info,
unsigned int count);
-void v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue);
+void v3d_job_update_stats(struct v3d_job *job, enum v3d_queue q);
int v3d_sched_init(struct v3d_dev *v3d);
void v3d_sched_fini(struct v3d_dev *v3d);
}
void
-v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue)
+v3d_job_update_stats(struct v3d_job *job, enum v3d_queue q)
{
struct v3d_dev *v3d = job->v3d;
- struct v3d_file_priv *file = job->file_priv;
- struct v3d_stats *global_stats = &v3d->queue[queue].stats;
+ struct v3d_queue_state *queue = &v3d->queue[q];
+ struct v3d_stats *global_stats = &queue->stats;
u64 now = local_clock();
unsigned long flags;
preempt_disable();
/* Don't update the local stats if the file context has already closed */
- if (file)
- v3d_stats_update(&file->stats[queue], now);
- else
- drm_dbg(&v3d->drm, "The file descriptor was closed before job completion\n");
+ spin_lock(&queue->queue_lock);
+ if (job->file_priv)
+ v3d_stats_update(&job->file_priv->stats[q], now);
+ spin_unlock(&queue->queue_lock);
v3d_stats_update(global_stats, now);