bcm27xx: update 6.1 patches from RPi foundation
[openwrt/staging/xback.git] / target / linux / bcm27xx / patches-6.1 / 950-0012-drm-vc4-Add-support-for-gamma-on-BCM2711.patch
1 From 387b65c16c0d763ee4610675ce61e1072fa6cd72 Mon Sep 17 00:00:00 2001
2 From: Dave Stevenson <dave.stevenson@raspberrypi.com>
3 Date: Tue, 27 Apr 2021 14:24:21 +0200
4 Subject: [PATCH] drm/vc4: Add support for gamma on BCM2711
5
6 BCM2711 changes from a 256 entry lookup table to a 16 point
7 piecewise linear function as the pipeline bitdepth has increased
8 to make a LUT unwieldy.
9
10 Implement a simple conversion from a 256 entry LUT that userspace
11 is likely to expect to 16 evenly spread points in the PWL. This
12 could be improved with curve fitting at a later date.
13
14 Co-developed-by: Juerg Haefliger <juergh@canonical.com>
15 Signed-off-by: Juerg Haefliger <juergh@canonical.com>
16 Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
17 Signed-off-by: Maxime Ripard <maxime@cerno.tech>
18 ---
19 drivers/gpu/drm/vc4/vc4_crtc.c | 35 ++++++++++---
20 drivers/gpu/drm/vc4/vc4_drv.h | 28 +++++++++--
21 drivers/gpu/drm/vc4/vc4_hvs.c | 89 ++++++++++++++++++++++++++++++++--
22 drivers/gpu/drm/vc4/vc4_regs.h | 22 +++++++++
23 4 files changed, 162 insertions(+), 12 deletions(-)
24
25 --- a/drivers/gpu/drm/vc4/vc4_crtc.c
26 +++ b/drivers/gpu/drm/vc4/vc4_crtc.c
27 @@ -1326,19 +1326,42 @@ int vc4_crtc_init(struct drm_device *drm
28
29 if (!vc4->is_vc5) {
30 drm_mode_crtc_set_gamma_size(crtc, ARRAY_SIZE(vc4_crtc->lut_r));
31 + } else {
32 + /* This is a lie for hvs5 which uses a 16 point PWL, but it
33 + * allows for something smarter than just 16 linearly spaced
34 + * segments. Conversion is done in vc5_hvs_update_gamma_lut.
35 + */
36 + drm_mode_crtc_set_gamma_size(crtc, 256);
37 + }
38
39 - drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size);
40 + drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size);
41
42 + if (!vc4->is_vc5) {
43 /* We support CTM, but only for one CRTC at a time. It's therefore
44 * implemented as private driver state in vc4_kms, not here.
45 */
46 drm_crtc_enable_color_mgmt(crtc, 0, true, crtc->gamma_size);
47 - }
48
49 - for (i = 0; i < crtc->gamma_size; i++) {
50 - vc4_crtc->lut_r[i] = i;
51 - vc4_crtc->lut_g[i] = i;
52 - vc4_crtc->lut_b[i] = i;
53 + /* Initialize the VC4 gamma LUTs */
54 + for (i = 0; i < crtc->gamma_size; i++) {
55 + vc4_crtc->lut_r[i] = i;
56 + vc4_crtc->lut_g[i] = i;
57 + vc4_crtc->lut_b[i] = i;
58 + }
59 + } else {
60 + /* Initialize the VC5 gamma PWL entries. Assume 12-bit pipeline,
61 + * evenly spread over full range.
62 + */
63 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++) {
64 + vc4_crtc->pwl_r[i] =
65 + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
66 + vc4_crtc->pwl_g[i] =
67 + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
68 + vc4_crtc->pwl_b[i] =
69 + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
70 + vc4_crtc->pwl_a[i] =
71 + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
72 + }
73 }
74
75 return 0;
76 --- a/drivers/gpu/drm/vc4/vc4_drv.h
77 +++ b/drivers/gpu/drm/vc4/vc4_drv.h
78 @@ -20,6 +20,7 @@
79 #include <drm/drm_modeset_lock.h>
80
81 #include "uapi/drm/vc4_drm.h"
82 +#include "vc4_regs.h"
83
84 struct drm_device;
85 struct drm_gem_object;
86 @@ -481,6 +482,17 @@ struct vc4_pv_data {
87 enum vc4_encoder_type encoder_types[4];
88 };
89
90 +struct vc5_gamma_entry {
91 + u32 x_c_terms;
92 + u32 grad_term;
93 +};
94 +
95 +#define VC5_HVS_SET_GAMMA_ENTRY(x, c, g) (struct vc5_gamma_entry){ \
96 + .x_c_terms = VC4_SET_FIELD((x), SCALER5_DSPGAMMA_OFF_X) | \
97 + VC4_SET_FIELD((c), SCALER5_DSPGAMMA_OFF_C), \
98 + .grad_term = (g) \
99 +}
100 +
101 struct vc4_crtc {
102 struct drm_crtc base;
103 struct platform_device *pdev;
104 @@ -490,9 +502,19 @@ struct vc4_crtc {
105 /* Timestamp at start of vblank irq - unaffected by lock delays. */
106 ktime_t t_vblank;
107
108 - u8 lut_r[256];
109 - u8 lut_g[256];
110 - u8 lut_b[256];
111 + union {
112 + struct { /* VC4 gamma LUT */
113 + u8 lut_r[256];
114 + u8 lut_g[256];
115 + u8 lut_b[256];
116 + };
117 + struct { /* VC5 gamma PWL entries */
118 + struct vc5_gamma_entry pwl_r[SCALER5_DSPGAMMA_NUM_POINTS];
119 + struct vc5_gamma_entry pwl_g[SCALER5_DSPGAMMA_NUM_POINTS];
120 + struct vc5_gamma_entry pwl_b[SCALER5_DSPGAMMA_NUM_POINTS];
121 + struct vc5_gamma_entry pwl_a[SCALER5_DSPGAMMA_NUM_POINTS];
122 + };
123 + };
124
125 struct drm_pending_vblank_event *event;
126
127 --- a/drivers/gpu/drm/vc4/vc4_hvs.c
128 +++ b/drivers/gpu/drm/vc4/vc4_hvs.c
129 @@ -241,7 +241,8 @@ static void vc4_hvs_lut_load(struct vc4_
130 static void vc4_hvs_update_gamma_lut(struct vc4_hvs *hvs,
131 struct vc4_crtc *vc4_crtc)
132 {
133 - struct drm_crtc_state *crtc_state = vc4_crtc->base.state;
134 + struct drm_crtc *crtc = &vc4_crtc->base;
135 + struct drm_crtc_state *crtc_state = crtc->state;
136 struct drm_color_lut *lut = crtc_state->gamma_lut->data;
137 u32 length = drm_color_lut_size(crtc_state->gamma_lut);
138 u32 i;
139 @@ -255,6 +256,81 @@ static void vc4_hvs_update_gamma_lut(str
140 vc4_hvs_lut_load(hvs, vc4_crtc);
141 }
142
143 +static void vc5_hvs_write_gamma_entry(struct vc4_hvs *hvs,
144 + u32 offset,
145 + struct vc5_gamma_entry *gamma)
146 +{
147 + HVS_WRITE(offset, gamma->x_c_terms);
148 + HVS_WRITE(offset + 4, gamma->grad_term);
149 +}
150 +
151 +static void vc5_hvs_lut_load(struct vc4_hvs *hvs,
152 + struct vc4_crtc *vc4_crtc)
153 +{
154 + struct drm_crtc *crtc = &vc4_crtc->base;
155 + struct drm_crtc_state *crtc_state = crtc->state;
156 + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state);
157 + u32 i;
158 + u32 offset = SCALER5_DSPGAMMA_START +
159 + vc4_state->assigned_channel * SCALER5_DSPGAMMA_CHAN_OFFSET;
160 +
161 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
162 + vc5_hvs_write_gamma_entry(hvs, offset, &vc4_crtc->pwl_r[i]);
163 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
164 + vc5_hvs_write_gamma_entry(hvs, offset, &vc4_crtc->pwl_g[i]);
165 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
166 + vc5_hvs_write_gamma_entry(hvs, offset, &vc4_crtc->pwl_b[i]);
167 +
168 + if (vc4_state->assigned_channel == 2) {
169 + /* Alpha only valid on channel 2 */
170 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
171 + vc5_hvs_write_gamma_entry(hvs, offset, &vc4_crtc->pwl_a[i]);
172 + }
173 +}
174 +
175 +static void vc5_hvs_update_gamma_lut(struct vc4_hvs *hvs,
176 + struct vc4_crtc *vc4_crtc)
177 +{
178 + struct drm_crtc *crtc = &vc4_crtc->base;
179 + struct drm_color_lut *lut = crtc->state->gamma_lut->data;
180 + unsigned int step, i;
181 + u32 start, end;
182 +
183 +#define VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl, chan) \
184 + start = drm_color_lut_extract(lut[i * step].chan, 12); \
185 + end = drm_color_lut_extract(lut[(i + 1) * step - 1].chan, 12); \
186 + \
187 + /* Negative gradients not permitted by the hardware, so \
188 + * flatten such points out. \
189 + */ \
190 + if (end < start) \
191 + end = start; \
192 + \
193 + /* Assume 12bit pipeline. \
194 + * X evenly spread over full range (12 bit). \
195 + * C as U12.4 format. \
196 + * Gradient as U4.8 format. \
197 + */ \
198 + vc4_crtc->pwl[i] = \
199 + VC5_HVS_SET_GAMMA_ENTRY(i << 8, start << 4, \
200 + ((end - start) << 4) / (step - 1))
201 +
202 + /* HVS5 has a 16 point piecewise linear function for each colour
203 + * channel (including alpha on channel 2) on each display channel.
204 + *
205 + * Currently take a crude subsample of the gamma LUT, but this could
206 + * be improved to implement curve fitting.
207 + */
208 + step = crtc->gamma_size / SCALER5_DSPGAMMA_NUM_POINTS;
209 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++) {
210 + VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl_r, red);
211 + VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl_g, green);
212 + VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl_b, blue);
213 + }
214 +
215 + vc5_hvs_lut_load(hvs, vc4_crtc);
216 +}
217 +
218 u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo)
219 {
220 struct drm_device *drm = &hvs->vc4->base;
221 @@ -398,7 +474,10 @@ static int vc4_hvs_init_channel(struct v
222 /* Reload the LUT, since the SRAMs would have been disabled if
223 * all CRTCs had SCALER_DISPBKGND_GAMMA unset at once.
224 */
225 - vc4_hvs_lut_load(hvs, vc4_crtc);
226 + if (!vc4->is_vc5)
227 + vc4_hvs_lut_load(hvs, vc4_crtc);
228 + else
229 + vc5_hvs_lut_load(hvs, vc4_crtc);
230
231 drm_dev_exit(idx);
232
233 @@ -628,7 +707,11 @@ void vc4_hvs_atomic_flush(struct drm_crt
234 u32 dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(channel));
235
236 if (crtc->state->gamma_lut) {
237 - vc4_hvs_update_gamma_lut(hvs, vc4_crtc);
238 + if (!vc4->is_vc5)
239 + vc4_hvs_update_gamma_lut(hvs, vc4_crtc);
240 + else
241 + vc5_hvs_update_gamma_lut(hvs, vc4_crtc);
242 +
243 dispbkgndx |= SCALER_DISPBKGND_GAMMA;
244 } else {
245 /* Unsetting DISPBKGND_GAMMA skips the gamma lut step
246 --- a/drivers/gpu/drm/vc4/vc4_regs.h
247 +++ b/drivers/gpu/drm/vc4/vc4_regs.h
248 @@ -512,6 +512,28 @@
249 #define SCALER_DLIST_START 0x00002000
250 #define SCALER_DLIST_SIZE 0x00004000
251
252 +/* Gamma PWL for each channel. 16 points for each of 4 colour channels (alpha
253 + * only on channel 2). 8 bytes per entry, offsets first, then gradient:
254 + * Y = GRAD * X + C
255 + *
256 + * Values for X and C are left justified, and vary depending on the width of
257 + * the HVS channel:
258 + * 8-bit pipeline: X uses [31:24], C is U8.8 format, and GRAD is U4.8.
259 + * 12-bit pipeline: X uses [31:20], C is U12.4 format, and GRAD is U4.8.
260 + *
261 + * The 3 HVS channels start at 0x400 offsets (ie chan 1 starts at 0x2400, and
262 + * chan 2 at 0x2800).
263 + */
264 +#define SCALER5_DSPGAMMA_NUM_POINTS 16
265 +#define SCALER5_DSPGAMMA_START 0x00002000
266 +#define SCALER5_DSPGAMMA_CHAN_OFFSET 0x400
267 +# define SCALER5_DSPGAMMA_OFF_X_MASK VC4_MASK(31, 20)
268 +# define SCALER5_DSPGAMMA_OFF_X_SHIFT 20
269 +# define SCALER5_DSPGAMMA_OFF_C_MASK VC4_MASK(15, 0)
270 +# define SCALER5_DSPGAMMA_OFF_C_SHIFT 0
271 +# define SCALER5_DSPGAMMA_GRAD_MASK VC4_MASK(11, 0)
272 +# define SCALER5_DSPGAMMA_GRAD_SHIFT 0
273 +
274 #define SCALER5_DLIST_START 0x00004000
275
276 # define VC4_HDMI_SW_RESET_FORMAT_DETECT BIT(1)