drm/vc4: Add support for gamma ramps.

We could possibly save a bit of power by not requesting gamma
conversion when the ramp happens to be 1:1, but at least if all the
CRTCs are off the SRAM will be disabled.

This should fix brightness sliders in a lot of fullscreen games.

Signed-off-by: Eric Anholt <eric@anholt.net>
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 355ee4b..231356f 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -49,6 +49,10 @@
 	/* Which HVS channel we're using for our CRTC. */
 	int channel;
 
+	u8 lut_r[256];
+	u8 lut_g[256];
+	u8 lut_b[256];
+
 	struct drm_pending_vblank_event *event;
 };
 
@@ -147,6 +151,46 @@
 	drm_crtc_cleanup(crtc);
 }
 
+static void
+vc4_crtc_lut_load(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+	u32 i;
+
+	/* The LUT memory is laid out with each HVS channel in order,
+	 * each of which takes 256 writes for R, 256 for G, then 256
+	 * for B.
+	 */
+	HVS_WRITE(SCALER_GAMADDR,
+		  SCALER_GAMADDR_AUTOINC |
+		  (vc4_crtc->channel * 3 * crtc->gamma_size));
+
+	for (i = 0; i < crtc->gamma_size; i++)
+		HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_r[i]);
+	for (i = 0; i < crtc->gamma_size; i++)
+		HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_g[i]);
+	for (i = 0; i < crtc->gamma_size; i++)
+		HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_b[i]);
+}
+
+static void
+vc4_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
+		   uint32_t start, uint32_t size)
+{
+	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+	u32 i;
+
+	for (i = start; i < start + size; i++) {
+		vc4_crtc->lut_r[i] = r[i] >> 8;
+		vc4_crtc->lut_g[i] = g[i] >> 8;
+		vc4_crtc->lut_b[i] = b[i] >> 8;
+	}
+
+	vc4_crtc_lut_load(crtc);
+}
+
 static u32 vc4_get_fifo_full_level(u32 format)
 {
 	static const u32 fifo_len_bytes = 64;
@@ -260,8 +304,14 @@
 
 	HVS_WRITE(SCALER_DISPBKGNDX(vc4_crtc->channel),
 		  SCALER_DISPBKGND_AUTOHS |
+		  SCALER_DISPBKGND_GAMMA |
 		  (interlace ? SCALER_DISPBKGND_INTERLACE : 0));
 
+	/* Reload the LUT, since the SRAMs would have been disabled if
+	 * all CRTCs had SCALER_DISPBKGND_GAMMA unset at once.
+	 */
+	vc4_crtc_lut_load(crtc);
+
 	if (debug_dump_regs) {
 		DRM_INFO("CRTC %d regs after:\n", drm_crtc_index(crtc));
 		vc4_crtc_dump_regs(vc4_crtc);
@@ -613,6 +663,7 @@
 	.reset = drm_atomic_helper_crtc_reset,
 	.atomic_duplicate_state = vc4_crtc_duplicate_state,
 	.atomic_destroy_state = vc4_crtc_destroy_state,
+	.gamma_set = vc4_crtc_gamma_set,
 };
 
 static const struct drm_crtc_helper_funcs vc4_crtc_helper_funcs = {
@@ -711,6 +762,7 @@
 	primary_plane->crtc = crtc;
 	vc4->crtc[drm_crtc_index(crtc)] = vc4_crtc;
 	vc4_crtc->channel = vc4_crtc->data->hvs_channel;
+	drm_mode_crtc_set_gamma_size(crtc, ARRAY_SIZE(vc4_crtc->lut_r));
 
 	/* Set up some arbitrary number of planes.  We're not limited
 	 * by a set number of physical registers, just the space in
@@ -751,6 +803,12 @@
 
 	vc4_set_crtc_possible_masks(drm, crtc);
 
+	for (i = 0; i < crtc->gamma_size; i++) {
+		vc4_crtc->lut_r[i] = i;
+		vc4_crtc->lut_g[i] = i;
+		vc4_crtc->lut_b[i] = i;
+	}
+
 	platform_set_drvdata(pdev, vc4_crtc);
 
 	return 0;