struct vc4_dev *vc4 = to_vc4_dev(drm);
        struct vc4_crtc *vc4_crtc;
        struct drm_crtc *crtc;
-       struct drm_plane *primary_plane, *cursor_plane;
+       struct drm_plane *primary_plane, *cursor_plane, *destroy_plane, *temp;
        const struct of_device_id *match;
-       int ret;
+       int ret, i;
 
        vc4_crtc = devm_kzalloc(dev, sizeof(*vc4_crtc), GFP_KERNEL);
        if (!vc4_crtc)
                goto err;
        }
 
-       cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR);
-       if (IS_ERR(cursor_plane)) {
-               dev_err(dev, "failed to construct cursor plane\n");
-               ret = PTR_ERR(cursor_plane);
-               goto err_primary;
-       }
-
-       drm_crtc_init_with_planes(drm, crtc, primary_plane, cursor_plane,
+       drm_crtc_init_with_planes(drm, crtc, primary_plane, NULL,
                                  &vc4_crtc_funcs, NULL);
        drm_crtc_helper_add(crtc, &vc4_crtc_helper_funcs);
        primary_plane->crtc = crtc;
-       cursor_plane->crtc = crtc;
        vc4->crtc[drm_crtc_index(crtc)] = vc4_crtc;
        vc4_crtc->channel = vc4_crtc->data->hvs_channel;
 
+       /* Set up some arbitrary number of planes.  We're not limited
+        * by a set number of physical registers, just the space in
+        * the HVS (16k) and how small an plane can be (28 bytes).
+        * However, each plane we set up takes up some memory, and
+        * increases the cost of looping over planes, which atomic
+        * modesetting does quite a bit.  As a result, we pick a
+        * modest number of planes to expose, that should hopefully
+        * still cover any sane usecase.
+        */
+       for (i = 0; i < 8; i++) {
+               struct drm_plane *plane =
+                       vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY);
+
+               if (IS_ERR(plane))
+                       continue;
+
+               plane->possible_crtcs = 1 << drm_crtc_index(crtc);
+       }
+
+       /* Set up the legacy cursor after overlay initialization,
+        * since we overlay planes on the CRTC in the order they were
+        * initialized.
+        */
+       cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR);
+       if (!IS_ERR(cursor_plane)) {
+               cursor_plane->possible_crtcs = 1 << drm_crtc_index(crtc);
+               cursor_plane->crtc = crtc;
+               crtc->cursor = cursor_plane;
+       }
+
        CRTC_WRITE(PV_INTEN, 0);
        CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
        ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
                               vc4_crtc_irq_handler, 0, "vc4 crtc", vc4_crtc);
        if (ret)
-               goto err_cursor;
+               goto err_destroy_planes;
 
        vc4_set_crtc_possible_masks(drm, crtc);
 
 
        return 0;
 
-err_cursor:
-       cursor_plane->funcs->destroy(cursor_plane);
-err_primary:
-       primary_plane->funcs->destroy(primary_plane);
+err_destroy_planes:
+       list_for_each_entry_safe(destroy_plane, temp,
+                                &drm->mode_config.plane_list, head) {
+               if (destroy_plane->possible_crtcs == 1 << drm_crtc_index(crtc))
+                   destroy_plane->funcs->destroy(destroy_plane);
+       }
 err:
        return ret;
 }