From 80fd814e93f10fe60d02d8a31c46718ff9e9edec Mon Sep 17 00:00:00 2001 From: Louis Chauvet Date: Tue, 10 Sep 2024 17:10:45 +0200 Subject: [PATCH 01/16] MAINTAINERS: Add myself as VKMS Maintainer MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit I've been actively working on VKMS to provide new features and participated in reviews and testing. To help Maìra with her work, add myself as co-maintainer of VKMS. Signed-off-by: Louis Chauvet Acked-by: Maíra Canal Signed-off-by: Maíra Canal Link: https://patchwork.freedesktop.org/patch/msgid/20240910-vkms-maintainer-v1-1-e7a6c7a4ae71@bootlin.com --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 16a27a0abb9d..3340ccc37c29 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7347,6 +7347,7 @@ F: drivers/gpu/drm/udl/ DRM DRIVER FOR VIRTUAL KERNEL MODESETTING (VKMS) M: Maíra Canal +M: Louis Chauvet R: Haneen Mohammed R: Simona Vetter R: Melissa Wen -- 2.51.0 From 6e5eb6dcb3042adcda432a3498d0d62d91588c4d Mon Sep 17 00:00:00 2001 From: Louis Chauvet Date: Thu, 5 Sep 2024 15:27:06 +0200 Subject: [PATCH 02/16] drm/vkms: Add documentation MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add documentation around vkms_output and its initialization. Add some documentation on pixel conversion functions. Update of outdated comments for pixel_write functions. Reviewed-by: José Expósito Signed-off-by: Louis Chauvet Signed-off-by: Maíra Canal Link: https://patchwork.freedesktop.org/patch/msgid/20240905-google-clarifications-v4-1-e43c1f2e3d87@bootlin.com --- drivers/gpu/drm/vkms/vkms_composer.c | 7 ++ drivers/gpu/drm/vkms/vkms_drv.h | 101 +++++++++++++++++++++++---- drivers/gpu/drm/vkms/vkms_formats.c | 62 +++++++++++++--- drivers/gpu/drm/vkms/vkms_output.c | 14 +++- 4 files changed, 161 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index e7441b227b3c..57a5769fc994 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -189,6 +189,13 @@ static void blend(struct vkms_writeback_job *wb, size_t crtc_y_limit = crtc_state->base.crtc->mode.vdisplay; + /* + * The planes are composed line-by-line to avoid heavy memory usage. It is a necessary + * complexity to avoid poor blending performance. + * + * The function vkms_compose_row() is used to read a line, pixel-by-pixel, into the staging + * buffer. + */ for (size_t y = 0; y < crtc_y_limit; y++) { fill_background(&background_color, output_buffer); diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h index 5e46ea5b96dc..672fe191e239 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.h +++ b/drivers/gpu/drm/vkms/vkms_drv.h @@ -25,6 +25,17 @@ #define VKMS_LUT_SIZE 256 +/** + * struct vkms_frame_info - Structure to store the state of a frame + * + * @fb: backing drm framebuffer + * @src: source rectangle of this frame in the source framebuffer, stored in 16.16 fixed-point form + * @dst: destination rectangle in the crtc buffer, stored in whole pixel units + * @map: see @drm_shadow_plane_state.data + * @rotation: rotation applied to the source. + * + * @src and @dst should have the same size modulo the rotation. + */ struct vkms_frame_info { struct drm_framebuffer *fb; struct drm_rect src, dst; @@ -52,9 +63,11 @@ struct vkms_writeback_job { }; /** - * vkms_plane_state - Driver specific plane state + * struct vkms_plane_state - Driver specific plane state * @base: base plane state * @frame_info: data required for composing computation + * @pixel_read: function to read a pixel in this plane. The creator of a struct vkms_plane_state + * must ensure that this pointer is valid */ struct vkms_plane_state { struct drm_shadow_plane_state base; @@ -73,29 +86,56 @@ struct vkms_color_lut { }; /** - * vkms_crtc_state - Driver specific CRTC state + * struct vkms_crtc_state - Driver specific CRTC state + * * @base: base CRTC state * @composer_work: work struct to compose and add CRC entries - * @n_frame_start: start frame number for computed CRC - * @n_frame_end: end frame number for computed CRC + * + * @num_active_planes: Number of active planes + * @active_planes: List containing all the active planes (counted by + * @num_active_planes). They should be stored in z-order. + * @active_writeback: Current active writeback job + * @gamma_lut: Look up table for gamma used in this CRTC + * @crc_pending: Protected by @vkms_output.composer_lock, true when the frame CRC is not computed + * yet. Used by vblank to detect if the composer is too slow. + * @wb_pending: Protected by @vkms_output.composer_lock, true when a writeback frame is requested. + * @frame_start: Protected by @vkms_output.composer_lock, saves the frame number before the start + * of the composition process. + * @frame_end: Protected by @vkms_output.composer_lock, saves the last requested frame number. + * This is used to generate enough CRC entries when the composition worker is too slow. */ struct vkms_crtc_state { struct drm_crtc_state base; struct work_struct composer_work; int num_active_planes; - /* stack of active planes for crc computation, should be in z order */ struct vkms_plane_state **active_planes; struct vkms_writeback_job *active_writeback; struct vkms_color_lut gamma_lut; - /* below four are protected by vkms_output.composer_lock */ bool crc_pending; bool wb_pending; u64 frame_start; u64 frame_end; }; +/** + * struct vkms_output - Internal representation of all output components in VKMS + * + * @crtc: Base CRTC in DRM + * @encoder: DRM encoder used for this output + * @connector: DRM connector used for this output + * @wb_connecter: DRM writeback connector used for this output + * @vblank_hrtimer: Timer used to trigger the vblank + * @period_ns: vblank period, in nanoseconds, used to configure @vblank_hrtimer and to compute + * vblank timestamps + * @composer_workq: Ordered workqueue for @composer_state.composer_work. + * @lock: Lock used to protect concurrent access to the composer + * @composer_enabled: Protected by @lock, true when the VKMS composer is active (crc needed or + * writeback) + * @composer_state: Protected by @lock, current state of this VKMS output + * @composer_lock: Lock used internally to protect @composer_state members + */ struct vkms_output { struct drm_crtc crtc; struct drm_encoder encoder; @@ -103,28 +143,38 @@ struct vkms_output { struct drm_writeback_connector wb_connector; struct hrtimer vblank_hrtimer; ktime_t period_ns; - /* ordered wq for composer_work */ struct workqueue_struct *composer_workq; - /* protects concurrent access to composer */ spinlock_t lock; - /* protected by @lock */ bool composer_enabled; struct vkms_crtc_state *composer_state; spinlock_t composer_lock; }; -struct vkms_device; - +/** + * struct vkms_config - General configuration for VKMS driver + * + * @writeback: If true, a writeback buffer can be attached to the CRTC + * @cursor: If true, a cursor plane is created in the VKMS device + * @overlay: If true, NUM_OVERLAY_PLANES will be created for the VKMS device + * @dev: Used to store the current VKMS device. Only set when the device is instantiated. + */ struct vkms_config { bool writeback; bool cursor; bool overlay; - /* only set when instantiated */ struct vkms_device *dev; }; +/** + * struct vkms_device - Description of a VKMS device + * + * @drm - Base device in DRM + * @platform - Associated platform device + * @output - Configuration and sub-components of the VKMS device + * @config: Configuration used in this VKMS device + */ struct vkms_device { struct drm_device drm; struct platform_device *platform; @@ -132,6 +182,10 @@ struct vkms_device { const struct vkms_config *config; }; +/* + * The following helpers are used to convert a member of a struct into its parent. + */ + #define drm_crtc_to_vkms_output(target) \ container_of(target, struct vkms_output, crtc) @@ -144,12 +198,33 @@ struct vkms_device { #define to_vkms_plane_state(target)\ container_of(target, struct vkms_plane_state, base.base) -/* CRTC */ +/** + * vkms_crtc_init() - Initialize a CRTC for VKMS + * @dev: DRM device associated with the VKMS buffer + * @crtc: uninitialized CRTC device + * @primary: primary plane to attach to the CRTC + * @cursor: plane to attach to the CRTC + */ int vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, struct drm_plane *primary, struct drm_plane *cursor); +/** + * vkms_output_init() - Initialize all sub-components needed for a VKMS device. + * + * @vkmsdev: VKMS device to initialize + * @index: CRTC which can be attached to the planes. The caller must ensure that + * @index is positive and less or equals to 31. + */ int vkms_output_init(struct vkms_device *vkmsdev, int index); +/** + * vkms_plane_init() - Initialize a plane + * + * @vkmsdev: VKMS device containing the plane + * @type: type of plane to initialize + * @index: CRTC which can be attached to the plane. The caller must ensure that + * @index is positive and less or equals to 31. + */ struct vkms_plane *vkms_plane_init(struct vkms_device *vkmsdev, enum drm_plane_type type, int index); diff --git a/drivers/gpu/drm/vkms/vkms_formats.c b/drivers/gpu/drm/vkms/vkms_formats.c index 040b7f113a3b..e8a5cc235ebb 100644 --- a/drivers/gpu/drm/vkms/vkms_formats.c +++ b/drivers/gpu/drm/vkms/vkms_formats.c @@ -9,24 +9,40 @@ #include "vkms_formats.h" +/** + * pixel_offset() - Get the offset of the pixel at coordinates x/y in the first plane + * + * @frame_info: Buffer metadata + * @x: The x coordinate of the wanted pixel in the buffer + * @y: The y coordinate of the wanted pixel in the buffer + * + * The caller must ensure that the framebuffer associated with this request uses a pixel format + * where block_h == block_w == 1. + * If this requirement is not fulfilled, the resulting offset can point to an other pixel or + * outside of the buffer. + */ static size_t pixel_offset(const struct vkms_frame_info *frame_info, int x, int y) { return frame_info->offset + (y * frame_info->pitch) + (x * frame_info->cpp); } -/* - * packed_pixels_addr - Get the pointer to pixel of a given pair of coordinates +/** + * packed_pixels_addr() - Get the pointer to the block containing the pixel at the given + * coordinates * * @frame_info: Buffer metadata - * @x: The x(width) coordinate of the 2D buffer - * @y: The y(Heigth) coordinate of the 2D buffer + * @x: The x (width) coordinate inside the plane + * @y: The y (height) coordinate inside the plane * * Takes the information stored in the frame_info, a pair of coordinates, and * returns the address of the first color channel. * This function assumes the channels are packed together, i.e. a color channel * comes immediately after another in the memory. And therefore, this function * doesn't work for YUV with chroma subsampling (e.g. YUV420 and NV21). + * + * The caller must ensure that the framebuffer associated with this request uses a pixel format + * where block_h == block_w == 1, otherwise the returned pointer can be outside the buffer. */ static void *packed_pixels_addr(const struct vkms_frame_info *frame_info, int x, int y) @@ -51,6 +67,13 @@ static int get_x_position(const struct vkms_frame_info *frame_info, int limit, i return x; } +/* + * The following functions take pixel data from the buffer and convert them to the format + * ARGB16161616 in @out_pixel. + * + * They are used in the vkms_compose_row() function to handle multiple formats. + */ + static void ARGB8888_to_argb_u16(u8 *src_pixels, struct pixel_argb_u16 *out_pixel) { /* @@ -143,12 +166,11 @@ void vkms_compose_row(struct line_buffer *stage_buffer, struct vkms_plane_state } /* - * The following functions take an line of argb_u16 pixels from the - * src_buffer, convert them to a specific format, and store them in the - * destination. + * The following functions take one &struct pixel_argb_u16 and convert it to a specific format. + * The result is stored in @dst_pixels. * - * They are used in the `compose_active_planes` to convert and store a line - * from the src_buffer to the writeback buffer. + * They are used in vkms_writeback_row() to convert and store a pixel from the src_buffer to + * the writeback buffer. */ static void argb_u16_to_ARGB8888(u8 *dst_pixels, struct pixel_argb_u16 *in_pixel) { @@ -214,6 +236,14 @@ static void argb_u16_to_RGB565(u8 *dst_pixels, struct pixel_argb_u16 *in_pixel) *pixels = cpu_to_le16(r << 11 | g << 5 | b); } +/** + * vkms_writeback_row() - Generic loop for all supported writeback format. It is executed just + * after the blending to write a line in the writeback buffer. + * + * @wb: Job where to insert the final image + * @src_buffer: Line to write + * @y: Row to write in the writeback buffer + */ void vkms_writeback_row(struct vkms_writeback_job *wb, const struct line_buffer *src_buffer, int y) { @@ -227,6 +257,13 @@ void vkms_writeback_row(struct vkms_writeback_job *wb, wb->pixel_write(dst_pixels, &in_pixels[x]); } +/** + * get_pixel_conversion_function() - Retrieve the correct read_pixel function for a specific + * format. The returned pointer is NULL for unsupported pixel formats. The caller must ensure that + * the pointer is valid before using it in a vkms_plane_state. + * + * @format: DRM_FORMAT_* value for which to obtain a conversion function (see [drm_fourcc.h]) + */ void *get_pixel_conversion_function(u32 format) { switch (format) { @@ -245,6 +282,13 @@ void *get_pixel_conversion_function(u32 format) } } +/** + * get_pixel_write_function() - Retrieve the correct write_pixel function for a specific format. + * The returned pointer is NULL for unsupported pixel formats. The caller must ensure that the + * pointer is valid before using it in a vkms_writeback_job. + * + * @format: DRM_FORMAT_* value for which to obtain a conversion function (see [drm_fourcc.h]) + */ void *get_pixel_write_function(u32 format) { switch (format) { diff --git a/drivers/gpu/drm/vkms/vkms_output.c b/drivers/gpu/drm/vkms/vkms_output.c index 5ce70dd946aa..0a5a185aa0b0 100644 --- a/drivers/gpu/drm/vkms/vkms_output.c +++ b/drivers/gpu/drm/vkms/vkms_output.c @@ -21,6 +21,7 @@ static int vkms_conn_get_modes(struct drm_connector *connector) { int count; + /* Use the default modes list from DRM */ count = drm_add_modes_noedid(connector, XRES_MAX, YRES_MAX); drm_set_preferred_mode(connector, XRES_DEF, YRES_DEF); @@ -58,6 +59,12 @@ int vkms_output_init(struct vkms_device *vkmsdev, int index) int writeback; unsigned int n; + /* + * Initialize used plane. One primary plane is required to perform the composition. + * + * The overlay and cursor planes are not mandatory, but can be used to perform complex + * composition. + */ primary = vkms_plane_init(vkmsdev, DRM_PLANE_TYPE_PRIMARY, index); if (IS_ERR(primary)) return PTR_ERR(primary); @@ -76,6 +83,7 @@ int vkms_output_init(struct vkms_device *vkmsdev, int index) return PTR_ERR(cursor); } + /* [1]: Allocation of a CRTC, its index will be BIT(0) = 1 */ ret = vkms_crtc_init(dev, crtc, &primary->base, &cursor->base); if (ret) return ret; @@ -95,7 +103,11 @@ int vkms_output_init(struct vkms_device *vkmsdev, int index) DRM_ERROR("Failed to init encoder\n"); goto err_encoder; } - encoder->possible_crtcs = 1; + /* + * This is a hardcoded value to select crtc for the encoder. + * BIT(0) here designate the first registered CRTC, the one allocated in [1] + */ + encoder->possible_crtcs = BIT(0); ret = drm_connector_attach_encoder(connector, encoder); if (ret) { -- 2.51.0 From 2b75feb312894f87d0539d46eeb364f447df723f Mon Sep 17 00:00:00 2001 From: Louis Chauvet Date: Thu, 12 Sep 2024 17:25:33 +0200 Subject: [PATCH 03/16] drm/vkms: Suppress context imbalance detected by sparse warning MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The functions `vkms_crtc_atomic_begin` and `vkms_crtc_atomic_flush` are responsible for locking and unlocking a mutex, respectively. Add the `__acquires` and `__releases` annotations to these functions to prevent the associated sparse warning about context imbalance. Signed-off-by: Louis Chauvet Reviewed-by: José Expósito Signed-off-by: Maíra Canal Link: https://patchwork.freedesktop.org/patch/msgid/20240912-vkms-warnings-v1-1-59f3e13ea8e5@bootlin.com --- drivers/gpu/drm/vkms/vkms_crtc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c index 40b4d084e3ce..2ad164b518fb 100644 --- a/drivers/gpu/drm/vkms/vkms_crtc.c +++ b/drivers/gpu/drm/vkms/vkms_crtc.c @@ -232,6 +232,7 @@ static void vkms_crtc_atomic_disable(struct drm_crtc *crtc, static void vkms_crtc_atomic_begin(struct drm_crtc *crtc, struct drm_atomic_state *state) + __acquires(&vkms_output->lock) { struct vkms_output *vkms_output = drm_crtc_to_vkms_output(crtc); @@ -243,6 +244,7 @@ static void vkms_crtc_atomic_begin(struct drm_crtc *crtc, static void vkms_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state) + __releases(&vkms_output->lock) { struct vkms_output *vkms_output = drm_crtc_to_vkms_output(crtc); -- 2.51.0 From 2f7bd9d66e638e1ce4a20fea83c9d44c25c3dae8 Mon Sep 17 00:00:00 2001 From: Louis Chauvet Date: Fri, 6 Sep 2024 18:15:03 +0200 Subject: [PATCH 04/16] drm/vkms: Add missing check for CRTC initialization MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit CRTC initialization call drm_mode_crtc_set_gamma_size without the proper checks, introduce this check to avoid issues. Signed-off-by: Louis Chauvet Reviewed-by: José Expósito Signed-off-by: Maíra Canal Link: https://patchwork.freedesktop.org/patch/msgid/20240906-vkms-add-missing-check-v1-1-1afb3bf3d0a6@bootlin.com --- drivers/gpu/drm/vkms/vkms_crtc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c index 2ad164b518fb..a40295c18b48 100644 --- a/drivers/gpu/drm/vkms/vkms_crtc.c +++ b/drivers/gpu/drm/vkms/vkms_crtc.c @@ -289,7 +289,12 @@ int vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, drm_crtc_helper_add(crtc, &vkms_crtc_helper_funcs); - drm_mode_crtc_set_gamma_size(crtc, VKMS_LUT_SIZE); + ret = drm_mode_crtc_set_gamma_size(crtc, VKMS_LUT_SIZE); + if (ret) { + DRM_ERROR("Failed to set gamma size\n"); + return ret; + } + drm_crtc_enable_color_mgmt(crtc, 0, false, VKMS_LUT_SIZE); spin_lock_init(&vkms_out->lock); -- 2.51.0 From cf1becb7f996a0a23ea2c270cf6bb0911ec3ca1a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:05 -0300 Subject: [PATCH 05/16] drm/v3d: Address race-condition in MMU flush MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We must first flush the MMU cache and then, flush the TLB, not the other way around. Currently, we can see a race condition between the MMU cache and the TLB when running multiple rendering processes at the same time. This is evidenced by MMU errors triggered by the IRQ. Fix the MMU flush order by flushing the MMU cache and then the TLB. Also, in order to address the race condition, wait for the MMU cache flush to finish before starting the TLB flush. Fixes: 57692c94dcbe ("drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+") Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-2-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_mmu.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c index 14f3af40d6f6..e36ec3343b06 100644 --- a/drivers/gpu/drm/v3d/v3d_mmu.c +++ b/drivers/gpu/drm/v3d/v3d_mmu.c @@ -32,32 +32,23 @@ static int v3d_mmu_flush_all(struct v3d_dev *v3d) { int ret; - /* Make sure that another flush isn't already running when we - * start this one. - */ - ret = wait_for(!(V3D_READ(V3D_MMU_CTL) & - V3D_MMU_CTL_TLB_CLEARING), 100); - if (ret) - dev_err(v3d->drm.dev, "TLB clear wait idle pre-wait failed\n"); - - V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL) | - V3D_MMU_CTL_TLB_CLEAR); - - V3D_WRITE(V3D_MMUC_CONTROL, - V3D_MMUC_CONTROL_FLUSH | + V3D_WRITE(V3D_MMUC_CONTROL, V3D_MMUC_CONTROL_FLUSH | V3D_MMUC_CONTROL_ENABLE); - ret = wait_for(!(V3D_READ(V3D_MMU_CTL) & - V3D_MMU_CTL_TLB_CLEARING), 100); + ret = wait_for(!(V3D_READ(V3D_MMUC_CONTROL) & + V3D_MMUC_CONTROL_FLUSHING), 100); if (ret) { - dev_err(v3d->drm.dev, "TLB clear wait idle failed\n"); + dev_err(v3d->drm.dev, "MMUC flush wait idle failed\n"); return ret; } - ret = wait_for(!(V3D_READ(V3D_MMUC_CONTROL) & - V3D_MMUC_CONTROL_FLUSHING), 100); + V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL) | + V3D_MMU_CTL_TLB_CLEAR); + + ret = wait_for(!(V3D_READ(V3D_MMU_CTL) & + V3D_MMU_CTL_TLB_CLEARING), 100); if (ret) - dev_err(v3d->drm.dev, "MMUC flush wait idle failed\n"); + dev_err(v3d->drm.dev, "MMU TLB clear wait idle failed\n"); return ret; } -- 2.51.0 From d2fb8811108b2c1285c56f4fba4fff8fe3525593 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:06 -0300 Subject: [PATCH 06/16] drm/v3d: Flush the MMU before we supply more memory to the binner MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We must ensure that the MMU is flushed before we supply more memory to the binner, otherwise we might end up with invalid MMU accesses by the GPU. Fixes: 57692c94dcbe ("drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+") Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-3-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_drv.h | 1 + drivers/gpu/drm/v3d/v3d_irq.c | 2 ++ drivers/gpu/drm/v3d/v3d_mmu.c | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index cf4b23369dc4..75b4725d49c7 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -553,6 +553,7 @@ void v3d_irq_disable(struct v3d_dev *v3d); void v3d_irq_reset(struct v3d_dev *v3d); /* v3d_mmu.c */ +int v3d_mmu_flush_all(struct v3d_dev *v3d); int v3d_mmu_set_page_table(struct v3d_dev *v3d); void v3d_mmu_insert_ptes(struct v3d_bo *bo); void v3d_mmu_remove_ptes(struct v3d_bo *bo); diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index d469bda52c1a..20bf33702c3c 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -70,6 +70,8 @@ v3d_overflow_mem_work(struct work_struct *work) list_add_tail(&bo->unref_head, &v3d->bin_job->render->unref_list); spin_unlock_irqrestore(&v3d->job_lock, irqflags); + v3d_mmu_flush_all(v3d); + V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << V3D_MMU_PAGE_SHIFT); V3D_CORE_WRITE(0, V3D_PTB_BPOS, obj->size); diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c index e36ec3343b06..5bb7821c0243 100644 --- a/drivers/gpu/drm/v3d/v3d_mmu.c +++ b/drivers/gpu/drm/v3d/v3d_mmu.c @@ -28,7 +28,7 @@ #define V3D_PTE_WRITEABLE BIT(29) #define V3D_PTE_VALID BIT(28) -static int v3d_mmu_flush_all(struct v3d_dev *v3d) +int v3d_mmu_flush_all(struct v3d_dev *v3d) { int ret; -- 2.51.0 From 56cf76ed784fe5347c9305f7f0f3d0f9cb8d8a3b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:07 -0300 Subject: [PATCH 07/16] drm/v3d: Fix return if scheduler initialization fails MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit If the scheduler initialization fails, GEM initialization must fail as well. Therefore, if `v3d_sched_init()` fails, free the DMA memory allocated and return the error value in `v3d_gem_init()`. Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-4-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index da8faf3b9011..b3b76332f2c5 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -291,8 +291,9 @@ v3d_gem_init(struct drm_device *dev) ret = v3d_sched_init(v3d); if (ret) { drm_mm_takedown(&v3d->mm); - dma_free_coherent(v3d->drm.dev, 4096 * 1024, (void *)v3d->pt, + dma_free_coherent(v3d->drm.dev, pt_size, (void *)v3d->pt, v3d->pt_paddr); + return ret; } return 0; -- 2.51.0 From 0992b2541e1cd9580c2e70fab7a78558de054bae Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:08 -0300 Subject: [PATCH 08/16] drm/gem: Create a drm_gem_object_init_with_mnt() function MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit For some applications, such as applications that uses huge pages, we might want to have a different mountpoint, for which we pass mount flags that better match our usecase. Therefore, create a new function `drm_gem_object_init_with_mnt()` that allow us to define the tmpfs mountpoint where the GEM object will be created. If this parameter is NULL, then we fallback to `shmem_file_setup()`. Signed-off-by: Maíra Canal Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-5-mcanal@igalia.com --- drivers/gpu/drm/drm_gem.c | 34 ++++++++++++++++++++++++++++++---- include/drm/drm_gem.h | 3 +++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 149b8e25da5b..ee811764c3df 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -114,22 +114,32 @@ drm_gem_init(struct drm_device *dev) } /** - * drm_gem_object_init - initialize an allocated shmem-backed GEM object + * drm_gem_object_init_with_mnt - initialize an allocated shmem-backed GEM + * object in a given shmfs mountpoint + * * @dev: drm_device the object should be initialized for * @obj: drm_gem_object to initialize * @size: object size + * @gemfs: tmpfs mount where the GEM object will be created. If NULL, use + * the usual tmpfs mountpoint (`shm_mnt`). * * Initialize an already allocated GEM object of the specified size with * shmfs backing store. */ -int drm_gem_object_init(struct drm_device *dev, - struct drm_gem_object *obj, size_t size) +int drm_gem_object_init_with_mnt(struct drm_device *dev, + struct drm_gem_object *obj, size_t size, + struct vfsmount *gemfs) { struct file *filp; drm_gem_private_object_init(dev, obj, size); - filp = shmem_file_setup("drm mm object", size, VM_NORESERVE); + if (gemfs) + filp = shmem_file_setup_with_mnt(gemfs, "drm mm object", size, + VM_NORESERVE); + else + filp = shmem_file_setup("drm mm object", size, VM_NORESERVE); + if (IS_ERR(filp)) return PTR_ERR(filp); @@ -137,6 +147,22 @@ int drm_gem_object_init(struct drm_device *dev, return 0; } +EXPORT_SYMBOL(drm_gem_object_init_with_mnt); + +/** + * drm_gem_object_init - initialize an allocated shmem-backed GEM object + * @dev: drm_device the object should be initialized for + * @obj: drm_gem_object to initialize + * @size: object size + * + * Initialize an already allocated GEM object of the specified size with + * shmfs backing store. + */ +int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, + size_t size) +{ + return drm_gem_object_init_with_mnt(dev, obj, size, NULL); +} EXPORT_SYMBOL(drm_gem_object_init); /** diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index bae4865b2101..2ebf6e10cc44 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -472,6 +472,9 @@ void drm_gem_object_release(struct drm_gem_object *obj); void drm_gem_object_free(struct kref *kref); int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); +int drm_gem_object_init_with_mnt(struct drm_device *dev, + struct drm_gem_object *obj, size_t size, + struct vfsmount *gemfs); void drm_gem_private_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); void drm_gem_private_object_fini(struct drm_gem_object *obj); -- 2.51.0 From eb8d395f68421449c6201d3019f51011d034f00e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:09 -0300 Subject: [PATCH 09/16] drm/v3d: Introduce gemfs MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Create a separate "tmpfs" kernel mount for V3D. This will allow us to move away from the shmemfs `shm_mnt` and gives the flexibility to do things like set our own mount options. Here, the interest is to use "huge=", which should allow us to enable the use of THP for our shmem-backed objects. Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-6-mcanal@igalia.com --- drivers/gpu/drm/v3d/Makefile | 3 ++- drivers/gpu/drm/v3d/v3d_drv.h | 9 +++++++ drivers/gpu/drm/v3d/v3d_gem.c | 3 +++ drivers/gpu/drm/v3d/v3d_gemfs.c | 46 +++++++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/v3d/v3d_gemfs.c diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile index b7d673f1153b..fcf710926057 100644 --- a/drivers/gpu/drm/v3d/Makefile +++ b/drivers/gpu/drm/v3d/Makefile @@ -13,7 +13,8 @@ v3d-y := \ v3d_trace_points.o \ v3d_sched.o \ v3d_sysfs.o \ - v3d_submit.o + v3d_submit.o \ + v3d_gemfs.o v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 75b4725d49c7..41b09c828848 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -137,6 +137,11 @@ struct v3d_dev { struct drm_mm mm; spinlock_t mm_lock; + /* + * tmpfs instance used for shmem backed objects + */ + struct vfsmount *gemfs; + struct work_struct overflow_mem_work; struct v3d_bin_job *bin_job; @@ -534,6 +539,10 @@ void v3d_reset(struct v3d_dev *v3d); void v3d_invalidate_caches(struct v3d_dev *v3d); void v3d_clean_caches(struct v3d_dev *v3d); +/* v3d_gemfs.c */ +void v3d_gemfs_init(struct v3d_dev *v3d); +void v3d_gemfs_fini(struct v3d_dev *v3d); + /* v3d_submit.c */ void v3d_job_cleanup(struct v3d_job *job); void v3d_job_put(struct v3d_job *job); diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index b3b76332f2c5..b1e681630ded 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -288,6 +288,8 @@ v3d_gem_init(struct drm_device *dev) v3d_init_hw_state(v3d); v3d_mmu_set_page_table(v3d); + v3d_gemfs_init(v3d); + ret = v3d_sched_init(v3d); if (ret) { drm_mm_takedown(&v3d->mm); @@ -305,6 +307,7 @@ v3d_gem_destroy(struct drm_device *dev) struct v3d_dev *v3d = to_v3d_dev(dev); v3d_sched_fini(v3d); + v3d_gemfs_fini(v3d); /* Waiting for jobs to finish would need to be done before * unregistering V3D. diff --git a/drivers/gpu/drm/v3d/v3d_gemfs.c b/drivers/gpu/drm/v3d/v3d_gemfs.c new file mode 100644 index 000000000000..31cf5bd11e39 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_gemfs.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2024 Raspberry Pi */ + +#include +#include + +#include "v3d_drv.h" + +void v3d_gemfs_init(struct v3d_dev *v3d) +{ + char huge_opt[] = "huge=within_size"; + struct file_system_type *type; + struct vfsmount *gemfs; + + /* + * By creating our own shmemfs mountpoint, we can pass in + * mount flags that better match our usecase. However, we + * only do so on platforms which benefit from it. + */ + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + goto err; + + type = get_fs_type("tmpfs"); + if (!type) + goto err; + + gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, huge_opt); + if (IS_ERR(gemfs)) + goto err; + + v3d->gemfs = gemfs; + drm_info(&v3d->drm, "Using Transparent Hugepages\n"); + + return; + +err: + v3d->gemfs = NULL; + drm_notice(&v3d->drm, + "Transparent Hugepage support is recommended for optimal performance on this platform!\n"); +} + +void v3d_gemfs_fini(struct v3d_dev *v3d) +{ + if (v3d->gemfs) + kern_unmount(v3d->gemfs); +} -- 2.51.0 From be431dfec976e553a08883e26d0d0cc2598a8dfa Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:10 -0300 Subject: [PATCH 10/16] drm/gem: Create shmem GEM object in a given mountpoint MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Create a function `drm_gem_shmem_create_with_mnt()`, similar to `drm_gem_shmem_create()`, that has a mountpoint as a argument. This function will create a shmem GEM object in a given tmpfs mountpoint. This function will be useful for drivers that have a special mountpoint with flags enabled. Signed-off-by: Maíra Canal Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-7-mcanal@igalia.com --- drivers/gpu/drm/drm_gem_shmem_helper.c | 30 ++++++++++++++++++++++---- include/drm/drm_gem_shmem_helper.h | 3 +++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 53c003983ad1..8508060a1a95 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -49,7 +49,8 @@ static const struct drm_gem_object_funcs drm_gem_shmem_funcs = { }; static struct drm_gem_shmem_object * -__drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private) +__drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private, + struct vfsmount *gemfs) { struct drm_gem_shmem_object *shmem; struct drm_gem_object *obj; @@ -76,7 +77,7 @@ __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private) drm_gem_private_object_init(dev, obj, size); shmem->map_wc = false; /* dma-buf mappings use always writecombine */ } else { - ret = drm_gem_object_init(dev, obj, size); + ret = drm_gem_object_init_with_mnt(dev, obj, size, gemfs); } if (ret) { drm_gem_private_object_fini(obj); @@ -123,10 +124,31 @@ err_free: */ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size) { - return __drm_gem_shmem_create(dev, size, false); + return __drm_gem_shmem_create(dev, size, false, NULL); } EXPORT_SYMBOL_GPL(drm_gem_shmem_create); +/** + * drm_gem_shmem_create_with_mnt - Allocate an object with the given size in a + * given mountpoint + * @dev: DRM device + * @size: Size of the object to allocate + * @gemfs: tmpfs mount where the GEM object will be created + * + * This function creates a shmem GEM object in a given tmpfs mountpoint. + * + * Returns: + * A struct drm_gem_shmem_object * on success or an ERR_PTR()-encoded negative + * error code on failure. + */ +struct drm_gem_shmem_object *drm_gem_shmem_create_with_mnt(struct drm_device *dev, + size_t size, + struct vfsmount *gemfs) +{ + return __drm_gem_shmem_create(dev, size, false, gemfs); +} +EXPORT_SYMBOL_GPL(drm_gem_shmem_create_with_mnt); + /** * drm_gem_shmem_free - Free resources associated with a shmem GEM object * @shmem: shmem GEM object to free @@ -765,7 +787,7 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, size_t size = PAGE_ALIGN(attach->dmabuf->size); struct drm_gem_shmem_object *shmem; - shmem = __drm_gem_shmem_create(dev, size, true); + shmem = __drm_gem_shmem_create(dev, size, true, NULL); if (IS_ERR(shmem)) return ERR_CAST(shmem); diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index efbc9f27312b..d22e3fb53631 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -97,6 +97,9 @@ struct drm_gem_shmem_object { container_of(obj, struct drm_gem_shmem_object, base) struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size); +struct drm_gem_shmem_object *drm_gem_shmem_create_with_mnt(struct drm_device *dev, + size_t size, + struct vfsmount *gemfs); void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem); void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem); -- 2.51.0 From 8dd6074d4719644b676adf9f07d71292bee28e3a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:11 -0300 Subject: [PATCH 11/16] drm/v3d: Reduce the alignment of the node allocation MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Currently, we are using an alignment of 128 kB to insert a node, which ends up wasting memory as we perform plenty of small BOs allocations (<= 4 kB). We require that allocations are aligned to 128Kb so for any allocation smaller than that, we are wasting the difference. This implies that we cannot effectively use the whole 4 GB address space available for the GPU in the RPi 4. Currently, we can allocate up to 32000 BOs of 4 kB (~140 MB) and 3000 BOs of 400 kB (~1,3 GB). This can be quite limiting for applications that have a high memory requirement, such as vkoverhead [1]. By reducing the page alignment to 4 kB, we can allocate up to 1000000 BOs of 4 kB (~4 GB) and 10000 BOs of 400 kB (~4 GB). Moreover, by performing benchmarks, we were able to attest that reducing the page alignment to 4 kB can provide a general performance improvement in OpenGL applications (e.g. glmark2). Therefore, this patch reduces the alignment of the node allocation to 4 kB, which will allow RPi users to explore the whole 4GB virtual address space provided by the hardware. Also, this patch allow users to fully run vkoverhead in the RPi 4/5, solving the issue reported in [1]. [1] https://github.com/zmike/vkoverhead/issues/14 Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-8-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_bo.c | 2 +- drivers/gpu/drm/v3d/v3d_drv.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index ecb80fd75b1a..427a1c405b19 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -123,7 +123,7 @@ v3d_bo_create_finish(struct drm_gem_object *obj) */ ret = drm_mm_insert_node_generic(&v3d->mm, &bo->node, obj->size >> V3D_MMU_PAGE_SHIFT, - GMP_GRANULARITY >> V3D_MMU_PAGE_SHIFT, 0, 0); + SZ_4K >> V3D_MMU_PAGE_SHIFT, 0, 0); spin_unlock(&v3d->mm_lock); if (ret) return ret; diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 41b09c828848..82d6e869dd41 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -19,8 +19,6 @@ struct clk; struct platform_device; struct reset_control; -#define GMP_GRANULARITY (128 * 1024) - #define V3D_MMU_PAGE_SHIFT 12 #define V3D_MAX_QUEUES (V3D_CPU + 1) -- 2.51.0 From e4c17720262f394f6320a2b6e17a128bfdabb37c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:12 -0300 Subject: [PATCH 12/16] drm/v3d: Support Big/Super Pages when writing out PTEs MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The V3D MMU also supports 64KB and 1MB pages, called big and super pages, respectively. In order to set a 64KB page or 1MB page in the MMU, we need to make sure that page table entries for all 4KB pages within a big/super page must be correctly configured. In order to create a big/super page, we need a contiguous memory region. That's why we use a separate mountpoint with THP enabled. In order to place the page table entries in the MMU, we iterate over the 16 4KB pages (for big pages) or 256 4KB pages (for super pages) and insert the PTE. Signed-off-by: Maíra Canal Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-9-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_drv.h | 1 + drivers/gpu/drm/v3d/v3d_mmu.c | 54 ++++++++++++++++++++++++++--------- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 82d6e869dd41..a98543290fa8 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -20,6 +20,7 @@ struct platform_device; struct reset_control; #define V3D_MMU_PAGE_SHIFT 12 +#define V3D_PAGE_FACTOR (PAGE_SIZE >> V3D_MMU_PAGE_SHIFT) #define V3D_MAX_QUEUES (V3D_CPU + 1) diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c index 5bb7821c0243..0f564fd7160c 100644 --- a/drivers/gpu/drm/v3d/v3d_mmu.c +++ b/drivers/gpu/drm/v3d/v3d_mmu.c @@ -25,9 +25,16 @@ * superpage bit set. */ #define V3D_PTE_SUPERPAGE BIT(31) +#define V3D_PTE_BIGPAGE BIT(30) #define V3D_PTE_WRITEABLE BIT(29) #define V3D_PTE_VALID BIT(28) +static bool v3d_mmu_is_aligned(u32 page, u32 page_address, size_t alignment) +{ + return IS_ALIGNED(page, alignment >> V3D_MMU_PAGE_SHIFT) && + IS_ALIGNED(page_address, alignment >> V3D_MMU_PAGE_SHIFT); +} + int v3d_mmu_flush_all(struct v3d_dev *v3d) { int ret; @@ -78,19 +85,40 @@ void v3d_mmu_insert_ptes(struct v3d_bo *bo) struct drm_gem_shmem_object *shmem_obj = &bo->base; struct v3d_dev *v3d = to_v3d_dev(shmem_obj->base.dev); u32 page = bo->node.start; - u32 page_prot = V3D_PTE_WRITEABLE | V3D_PTE_VALID; - struct sg_dma_page_iter dma_iter; - - for_each_sgtable_dma_page(shmem_obj->sgt, &dma_iter, 0) { - dma_addr_t dma_addr = sg_page_iter_dma_address(&dma_iter); - u32 page_address = dma_addr >> V3D_MMU_PAGE_SHIFT; - u32 pte = page_prot | page_address; - u32 i; - - BUG_ON(page_address + (PAGE_SIZE >> V3D_MMU_PAGE_SHIFT) >= - BIT(24)); - for (i = 0; i < PAGE_SIZE >> V3D_MMU_PAGE_SHIFT; i++) - v3d->pt[page++] = pte + i; + struct scatterlist *sgl; + unsigned int count; + + for_each_sgtable_dma_sg(shmem_obj->sgt, sgl, count) { + dma_addr_t dma_addr = sg_dma_address(sgl); + u32 pfn = dma_addr >> V3D_MMU_PAGE_SHIFT; + unsigned int len = sg_dma_len(sgl); + + while (len > 0) { + u32 page_prot = V3D_PTE_WRITEABLE | V3D_PTE_VALID; + u32 page_address = page_prot | pfn; + unsigned int i, page_size; + + BUG_ON(pfn + V3D_PAGE_FACTOR >= BIT(24)); + + if (len >= SZ_1M && + v3d_mmu_is_aligned(page, page_address, SZ_1M)) { + page_size = SZ_1M; + page_address |= V3D_PTE_SUPERPAGE; + } else if (len >= SZ_64K && + v3d_mmu_is_aligned(page, page_address, SZ_64K)) { + page_size = SZ_64K; + page_address |= V3D_PTE_BIGPAGE; + } else { + page_size = SZ_4K; + } + + for (i = 0; i < page_size >> V3D_MMU_PAGE_SHIFT; i++) { + v3d->pt[page++] = page_address + i; + pfn++; + } + + len -= page_size; + } } WARN_ON_ONCE(page - bo->node.start != -- 2.51.0 From 20d69e8905fc36818ab585cb50f6de48fb8f6de3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:13 -0300 Subject: [PATCH 13/16] drm/v3d: Use gemfs/THP in BO creation if available MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Although Big/Super Pages could appear naturally, it would be quite hard to have 1MB or 64KB allocated contiguously naturally. Therefore, we can force the creation of large pages allocated contiguously by using a mountpoint with "huge=within_size" enabled. Therefore, as V3D has a mountpoint with "huge=within_size" (if user has THP enabled), use this mountpoint for BO creation if available. This will allow us to create large pages allocated contiguously and make use of Big/Super Pages. Signed-off-by: Maíra Canal Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-10-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_bo.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index 427a1c405b19..cd85f997b058 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -107,6 +107,7 @@ v3d_bo_create_finish(struct drm_gem_object *obj) struct v3d_dev *v3d = to_v3d_dev(obj->dev); struct v3d_bo *bo = to_v3d_bo(obj); struct sg_table *sgt; + u64 align; int ret; /* So far we pin the BO in the MMU for its lifetime, so use @@ -116,6 +117,15 @@ v3d_bo_create_finish(struct drm_gem_object *obj) if (IS_ERR(sgt)) return PTR_ERR(sgt); + if (!v3d->gemfs) + align = SZ_4K; + else if (obj->size >= SZ_1M) + align = SZ_1M; + else if (obj->size >= SZ_64K) + align = SZ_64K; + else + align = SZ_4K; + spin_lock(&v3d->mm_lock); /* Allocate the object's space in the GPU's page tables. * Inserting PTEs will happen later, but the offset is for the @@ -123,7 +133,7 @@ v3d_bo_create_finish(struct drm_gem_object *obj) */ ret = drm_mm_insert_node_generic(&v3d->mm, &bo->node, obj->size >> V3D_MMU_PAGE_SHIFT, - SZ_4K >> V3D_MMU_PAGE_SHIFT, 0, 0); + align >> V3D_MMU_PAGE_SHIFT, 0, 0); spin_unlock(&v3d->mm_lock); if (ret) return ret; @@ -143,10 +153,17 @@ struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, size_t unaligned_size) { struct drm_gem_shmem_object *shmem_obj; + struct v3d_dev *v3d = to_v3d_dev(dev); struct v3d_bo *bo; int ret; - shmem_obj = drm_gem_shmem_create(dev, unaligned_size); + /* Let the user opt out of allocating the BOs with THP */ + if (v3d->gemfs) + shmem_obj = drm_gem_shmem_create_with_mnt(dev, unaligned_size, + v3d->gemfs); + else + shmem_obj = drm_gem_shmem_create(dev, unaligned_size); + if (IS_ERR(shmem_obj)) return ERR_CAST(shmem_obj); bo = to_v3d_bo(&shmem_obj->base); -- 2.51.0 From 0df4a13ca8bed29cbc362c99b958fbb8b27b1675 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:14 -0300 Subject: [PATCH 14/16] drm/v3d: Add modparam for turning off Big/Super Pages MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add a modparam for turning off Big/Super Pages to make sure that if an user doesn't want Big/Super Pages enabled, it can disabled it by setting the modparam to false. Signed-off-by: Maíra Canal Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-11-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_drv.c | 7 +++++++ drivers/gpu/drm/v3d/v3d_drv.h | 1 + drivers/gpu/drm/v3d/v3d_gemfs.c | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index d7ff1f5fa481..8be8c6dd9b35 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -36,6 +36,13 @@ #define DRIVER_MINOR 0 #define DRIVER_PATCHLEVEL 0 +/* Only expose the `super_pages` modparam if THP is enabled. */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +bool super_pages = true; +module_param_named(super_pages, super_pages, bool, 0400); +MODULE_PARM_DESC(super_pages, "Enable/Disable Super Pages support."); +#endif + static int v3d_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index a98543290fa8..de73eefff9ac 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -539,6 +539,7 @@ void v3d_invalidate_caches(struct v3d_dev *v3d); void v3d_clean_caches(struct v3d_dev *v3d); /* v3d_gemfs.c */ +extern bool super_pages; void v3d_gemfs_init(struct v3d_dev *v3d); void v3d_gemfs_fini(struct v3d_dev *v3d); diff --git a/drivers/gpu/drm/v3d/v3d_gemfs.c b/drivers/gpu/drm/v3d/v3d_gemfs.c index 31cf5bd11e39..4c5e18590a5c 100644 --- a/drivers/gpu/drm/v3d/v3d_gemfs.c +++ b/drivers/gpu/drm/v3d/v3d_gemfs.c @@ -20,6 +20,10 @@ void v3d_gemfs_init(struct v3d_dev *v3d) if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) goto err; + /* The user doesn't want to enable Super Pages */ + if (!super_pages) + goto err; + type = get_fs_type("tmpfs"); if (!type) goto err; -- 2.51.0 From 9f8e1c93a0d459463819d8bd222196b2655c279f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:15 -0300 Subject: [PATCH 15/16] drm/v3d: Expose Super Pages capability MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add a new V3D parameter to expose the support of Super Pages to userspace. The userspace might want to know this information to apply optimizations that are specific to kernels with Super Pages enabled. Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-12-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_drv.c | 3 +++ include/uapi/drm/v3d_drm.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index 8be8c6dd9b35..fb35c5c3f1a7 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -104,6 +104,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, case DRM_V3D_PARAM_MAX_PERF_COUNTERS: args->value = v3d->perfmon_info.max_counters; return 0; + case DRM_V3D_PARAM_SUPPORTS_SUPER_PAGES: + args->value = !!v3d->gemfs; + return 0; default: DRM_DEBUG("Unknown parameter %d\n", args->param); return -EINVAL; diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 87fc5bb0a61e..2376c73abca1 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -290,6 +290,7 @@ enum drm_v3d_param { DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT, DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE, DRM_V3D_PARAM_MAX_PERF_COUNTERS, + DRM_V3D_PARAM_SUPPORTS_SUPER_PAGES, }; struct drm_v3d_get_param { -- 2.51.0 From f73716fd4550d588a811f11c370e90c303f0829b Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Mon, 9 Sep 2024 08:48:20 +0200 Subject: [PATCH 16/16] drm/panthor: Add PANTHOR_GROUP_PRIORITY_REALTIME group priority This adds a new value to drm_panthor_group_priority exposing the realtime priority to userspace. This is required to implement NV_context_priority_realtime in Mesa. v2: - Add Steven Price r-b v3: - Add Boris Brezillon r-b Signed-off-by: Mary Guillemard Reviewed-by: Steven Price Reviewed-by: Boris Brezillon Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240909064820.34982-3-mary.guillemard@collabora.com --- drivers/gpu/drm/panthor/panthor_drv.c | 2 +- drivers/gpu/drm/panthor/panthor_sched.c | 2 -- include/uapi/drm/panthor_drm.h | 7 +++++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 0caf9e9a8c45..7b1db2adcb4c 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -1041,7 +1041,7 @@ static int group_priority_permit(struct drm_file *file, u8 priority) { /* Ensure that priority is valid */ - if (priority > PANTHOR_GROUP_PRIORITY_HIGH) + if (priority > PANTHOR_GROUP_PRIORITY_REALTIME) return -EINVAL; /* Medium priority and below are always allowed */ diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 42afdf0ddb7e..a9e3b1ccccc7 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -137,8 +137,6 @@ enum panthor_csg_priority { * non-real-time groups. When such a group becomes executable, * it will evict the group with the lowest non-rt priority if * there's no free group slot available. - * - * Currently not exposed to userspace. */ PANTHOR_CSG_PRIORITY_RT, diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 1fd8473548ac..011a555e4674 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -720,6 +720,13 @@ enum drm_panthor_group_priority { * Requires CAP_SYS_NICE or DRM_MASTER. */ PANTHOR_GROUP_PRIORITY_HIGH, + + /** + * @PANTHOR_GROUP_PRIORITY_REALTIME: Realtime priority group. + * + * Requires CAP_SYS_NICE or DRM_MASTER. + */ + PANTHOR_GROUP_PRIORITY_REALTIME, }; /** -- 2.51.0