#include <linux/delay.h>
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
+#include <linux/log2.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #define VOP_REG_SET(vop, group, name, v) \
                    vop_reg_set(vop, &vop->data->group->name, 0, ~0, v, #name)
 
+#define VOP_HAS_REG(vop, group, name) \
+               (!!(vop->data->group->name.mask))
+
 #define VOP_INTR_SET_TYPE(vop, name, type, v) \
        do { \
                int i, reg = 0, mask = 0; \
        return vop_read_reg(vop, 0, &vop->data->common->dsp_lut_en);
 }
 
+static u32 vop_lut_buffer_index(struct vop *vop)
+{
+       return vop_read_reg(vop, 0, &vop->data->common->lut_buffer_index);
+}
+
 static void vop_crtc_write_gamma_lut(struct vop *vop, struct drm_crtc *crtc)
 {
        struct drm_color_lut *lut = crtc->state->gamma_lut->data;
-       unsigned int i;
+       unsigned int i, bpc = ilog2(vop->data->lut_size);
 
        for (i = 0; i < crtc->gamma_size; i++) {
                u32 word;
 
-               word = (drm_color_lut_extract(lut[i].red, 10) << 20) |
-                      (drm_color_lut_extract(lut[i].green, 10) << 10) |
-                       drm_color_lut_extract(lut[i].blue, 10);
+               word = (drm_color_lut_extract(lut[i].red, bpc) << (2 * bpc)) |
+                      (drm_color_lut_extract(lut[i].green, bpc) << bpc) |
+                       drm_color_lut_extract(lut[i].blue, bpc);
                writel(word, vop->lut_regs + i * 4);
        }
 }
 {
        struct drm_crtc_state *state = crtc->state;
        unsigned int idle;
+       u32 lut_idx, old_idx;
        int ret;
 
        if (!vop->lut_regs)
                return;
-       /*
-        * To disable gamma (gamma_lut is null) or to write
-        * an update to the LUT, clear dsp_lut_en.
-        */
-       spin_lock(&vop->reg_lock);
-       VOP_REG_SET(vop, common, dsp_lut_en, 0);
-       vop_cfg_done(vop);
-       spin_unlock(&vop->reg_lock);
 
-       /*
-        * In order to write the LUT to the internal memory,
-        * we need to first make sure the dsp_lut_en bit is cleared.
-        */
-       ret = readx_poll_timeout(vop_dsp_lut_is_enabled, vop,
-                                idle, !idle, 5, 30 * 1000);
-       if (ret) {
-               DRM_DEV_ERROR(vop->dev, "display LUT RAM enable timeout!\n");
-               return;
-       }
+       if (!state->gamma_lut || !VOP_HAS_REG(vop, common, update_gamma_lut)) {
+               /*
+                * To disable gamma (gamma_lut is null) or to write
+                * an update to the LUT, clear dsp_lut_en.
+                */
+               spin_lock(&vop->reg_lock);
+               VOP_REG_SET(vop, common, dsp_lut_en, 0);
+               vop_cfg_done(vop);
+               spin_unlock(&vop->reg_lock);
 
-       if (!state->gamma_lut)
-               return;
+               /*
+                * In order to write the LUT to the internal memory,
+                * we need to first make sure the dsp_lut_en bit is cleared.
+                */
+               ret = readx_poll_timeout(vop_dsp_lut_is_enabled, vop,
+                                        idle, !idle, 5, 30 * 1000);
+               if (ret) {
+                       DRM_DEV_ERROR(vop->dev, "display LUT RAM enable timeout!\n");
+                       return;
+               }
+
+               if (!state->gamma_lut)
+                       return;
+       } else {
+               /*
+                * On RK3399 the gamma LUT can updated without clearing dsp_lut_en,
+                * by setting update_gamma_lut then waiting for lut_buffer_index change
+                */
+               old_idx = vop_lut_buffer_index(vop);
+       }
 
        spin_lock(&vop->reg_lock);
        vop_crtc_write_gamma_lut(vop, crtc);
        VOP_REG_SET(vop, common, dsp_lut_en, 1);
+       VOP_REG_SET(vop, common, update_gamma_lut, 1);
        vop_cfg_done(vop);
        spin_unlock(&vop->reg_lock);
+
+       if (VOP_HAS_REG(vop, common, update_gamma_lut)) {
+               ret = readx_poll_timeout(vop_lut_buffer_index, vop,
+                                        lut_idx, lut_idx != old_idx, 5, 30 * 1000);
+               if (ret) {
+                       DRM_DEV_ERROR(vop->dev, "gamma LUT update timeout!\n");
+                       return;
+               }
+
+               /*
+                * update_gamma_lut is auto cleared by HW, but write 0 to clear the bit
+                * in our backup of the regs.
+                */
+               spin_lock(&vop->reg_lock);
+               VOP_REG_SET(vop, common, update_gamma_lut, 0);
+               spin_unlock(&vop->reg_lock);
+       }
 }
 
 static void vop_crtc_atomic_begin(struct drm_crtc *crtc,
                return;
        }
 
-       /*
-        * If we have a GAMMA LUT in the state, then let's make sure
-        * it's updated. We might be coming out of suspend,
-        * which means the LUT internal memory needs to be re-written.
-        */
-       if (crtc->state->gamma_lut)
-               vop_crtc_gamma_set(vop, crtc, old_state);
-
        mutex_lock(&vop->vop_lock);
 
        WARN_ON(vop->event);
 
        VOP_REG_SET(vop, common, standby, 0);
        mutex_unlock(&vop->vop_lock);
+
+       /*
+        * If we have a GAMMA LUT in the state, then let's make sure
+        * it's updated. We might be coming out of suspend,
+        * which means the LUT internal memory needs to be re-written.
+        */
+       if (crtc->state->gamma_lut)
+               vop_crtc_gamma_set(vop, crtc, old_state);
 }
 
 static bool vop_fs_irq_is_pending(struct vop *vop)
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
        if (res) {
-               if (!vop_data->lut_size) {
-                       DRM_DEV_ERROR(dev, "no gamma LUT size defined\n");
+               if (vop_data->lut_size != 1024 && vop_data->lut_size != 256) {
+                       DRM_DEV_ERROR(dev, "unsupported gamma LUT size %d\n", vop_data->lut_size);
                        return -EINVAL;
                }
                vop->lut_regs = devm_ioremap_resource(dev, res);