8fd1294726a2a0d89054020eb9a8d3b5fdc9d18b
[oweals/openwrt.git] / target / linux / brcm2708 / patches-4.4 / 0281-drm-vc4-Add-support-for-scaling-of-display-planes.patch
1 From 8d513c7a67cce0bf0ef312323753eccbd0f3f71a Mon Sep 17 00:00:00 2001
2 From: Eric Anholt <eric@anholt.net>
3 Date: Tue, 20 Oct 2015 16:06:57 +0100
4 Subject: [PATCH 281/423] drm/vc4: Add support for scaling of display planes.
5
6 This implements a simple policy for choosing scaling modes
7 (trapezoidal for decimation, PPF for magnification), and a single PPF
8 filter (Mitchell/Netravali's recommendation).
9
10 Signed-off-by: Eric Anholt <eric@anholt.net>
11 (cherry picked from commit 21af94cf1a4c2d3450ab7fead58e6e2291ab92a9)
12 ---
13  drivers/gpu/drm/vc4/vc4_drv.h   |   4 +
14  drivers/gpu/drm/vc4/vc4_hvs.c   |  84 +++++++++++++
15  drivers/gpu/drm/vc4/vc4_plane.c | 253 +++++++++++++++++++++++++++++++++++++---
16  drivers/gpu/drm/vc4/vc4_regs.h  |  46 ++++++++
17  4 files changed, 374 insertions(+), 13 deletions(-)
18
19 --- a/drivers/gpu/drm/vc4/vc4_drv.h
20 +++ b/drivers/gpu/drm/vc4/vc4_drv.h
21 @@ -156,7 +156,11 @@ struct vc4_hvs {
22          * list.  Units are dwords.
23          */
24         struct drm_mm dlist_mm;
25 +       /* Memory manager for the LBM memory used by HVS scaling. */
26 +       struct drm_mm lbm_mm;
27         spinlock_t mm_lock;
28 +
29 +       struct drm_mm_node mitchell_netravali_filter;
30  };
31  
32  struct vc4_plane {
33 --- a/drivers/gpu/drm/vc4/vc4_hvs.c
34 +++ b/drivers/gpu/drm/vc4/vc4_hvs.c
35 @@ -100,12 +100,76 @@ int vc4_hvs_debugfs_regs(struct seq_file
36  }
37  #endif
38  
39 +/* The filter kernel is composed of dwords each containing 3 9-bit
40 + * signed integers packed next to each other.
41 + */
42 +#define VC4_INT_TO_COEFF(coeff) (coeff & 0x1ff)
43 +#define VC4_PPF_FILTER_WORD(c0, c1, c2)                                \
44 +       ((((c0) & 0x1ff) << 0) |                                \
45 +        (((c1) & 0x1ff) << 9) |                                \
46 +        (((c2) & 0x1ff) << 18))
47 +
48 +/* The whole filter kernel is arranged as the coefficients 0-16 going
49 + * up, then a pad, then 17-31 going down and reversed within the
50 + * dwords.  This means that a linear phase kernel (where it's
51 + * symmetrical at the boundary between 15 and 16) has the last 5
52 + * dwords matching the first 5, but reversed.
53 + */
54 +#define VC4_LINEAR_PHASE_KERNEL(c0, c1, c2, c3, c4, c5, c6, c7, c8,    \
55 +                               c9, c10, c11, c12, c13, c14, c15)       \
56 +       {VC4_PPF_FILTER_WORD(c0, c1, c2),                               \
57 +        VC4_PPF_FILTER_WORD(c3, c4, c5),                               \
58 +        VC4_PPF_FILTER_WORD(c6, c7, c8),                               \
59 +        VC4_PPF_FILTER_WORD(c9, c10, c11),                             \
60 +        VC4_PPF_FILTER_WORD(c12, c13, c14),                            \
61 +        VC4_PPF_FILTER_WORD(c15, c15, 0)}
62 +
63 +#define VC4_LINEAR_PHASE_KERNEL_DWORDS 6
64 +#define VC4_KERNEL_DWORDS (VC4_LINEAR_PHASE_KERNEL_DWORDS * 2 - 1)
65 +
66 +/* Recommended B=1/3, C=1/3 filter choice from Mitchell/Netravali.
67 + * http://www.cs.utexas.edu/~fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf
68 + */
69 +static const u32 mitchell_netravali_1_3_1_3_kernel[] =
70 +       VC4_LINEAR_PHASE_KERNEL(0, -2, -6, -8, -10, -8, -3, 2, 18,
71 +                               50, 82, 119, 155, 187, 213, 227);
72 +
73 +static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs,
74 +                                       struct drm_mm_node *space,
75 +                                       const u32 *kernel)
76 +{
77 +       int ret, i;
78 +       u32 __iomem *dst_kernel;
79 +
80 +       ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS, 1,
81 +                                0);
82 +       if (ret) {
83 +               DRM_ERROR("Failed to allocate space for filter kernel: %d\n",
84 +                         ret);
85 +               return ret;
86 +       }
87 +
88 +       dst_kernel = hvs->dlist + space->start;
89 +
90 +       for (i = 0; i < VC4_KERNEL_DWORDS; i++) {
91 +               if (i < VC4_LINEAR_PHASE_KERNEL_DWORDS)
92 +                       writel(kernel[i], &dst_kernel[i]);
93 +               else {
94 +                       writel(kernel[VC4_KERNEL_DWORDS - i - 1],
95 +                              &dst_kernel[i]);
96 +               }
97 +       }
98 +
99 +       return 0;
100 +}
101 +
102  static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
103  {
104         struct platform_device *pdev = to_platform_device(dev);
105         struct drm_device *drm = dev_get_drvdata(master);
106         struct vc4_dev *vc4 = drm->dev_private;
107         struct vc4_hvs *hvs = NULL;
108 +       int ret;
109  
110         hvs = devm_kzalloc(&pdev->dev, sizeof(*hvs), GFP_KERNEL);
111         if (!hvs)
112 @@ -130,6 +194,22 @@ static int vc4_hvs_bind(struct device *d
113                     HVS_BOOTLOADER_DLIST_END,
114                     (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END);
115  
116 +       /* Set up the HVS LBM memory manager.  We could have some more
117 +        * complicated data structure that allowed reuse of LBM areas
118 +        * between planes when they don't overlap on the screen, but
119 +        * for now we just allocate globally.
120 +        */
121 +       drm_mm_init(&hvs->lbm_mm, 0, 96 * 1024);
122 +
123 +       /* Upload filter kernels.  We only have the one for now, so we
124 +        * keep it around for the lifetime of the driver.
125 +        */
126 +       ret = vc4_hvs_upload_linear_kernel(hvs,
127 +                                          &hvs->mitchell_netravali_filter,
128 +                                          mitchell_netravali_1_3_1_3_kernel);
129 +       if (ret)
130 +               return ret;
131 +
132         vc4->hvs = hvs;
133         return 0;
134  }
135 @@ -140,7 +220,11 @@ static void vc4_hvs_unbind(struct device
136         struct drm_device *drm = dev_get_drvdata(master);
137         struct vc4_dev *vc4 = drm->dev_private;
138  
139 +       if (vc4->hvs->mitchell_netravali_filter.allocated)
140 +               drm_mm_remove_node(&vc4->hvs->mitchell_netravali_filter);
141 +
142         drm_mm_takedown(&vc4->hvs->dlist_mm);
143 +       drm_mm_takedown(&vc4->hvs->lbm_mm);
144  
145         vc4->hvs = NULL;
146  }
147 --- a/drivers/gpu/drm/vc4/vc4_plane.c
148 +++ b/drivers/gpu/drm/vc4/vc4_plane.c
149 @@ -24,6 +24,12 @@
150  #include "drm_fb_cma_helper.h"
151  #include "drm_plane_helper.h"
152  
153 +enum vc4_scaling_mode {
154 +       VC4_SCALING_NONE,
155 +       VC4_SCALING_TPZ,
156 +       VC4_SCALING_PPF,
157 +};
158 +
159  struct vc4_plane_state {
160         struct drm_plane_state base;
161         /* System memory copy of the display list for this element, computed
162 @@ -47,13 +53,19 @@ struct vc4_plane_state {
163  
164         /* Clipped coordinates of the plane on the display. */
165         int crtc_x, crtc_y, crtc_w, crtc_h;
166 -       /* Clipped size of the area scanned from in the FB. */
167 -       u32 src_w, src_h;
168 +       /* Clipped area being scanned from in the FB. */
169 +       u32 src_x, src_y, src_w, src_h;
170 +
171 +       enum vc4_scaling_mode x_scaling, y_scaling;
172 +       bool is_unity;
173  
174         /* Offset to start scanning out from the start of the plane's
175          * BO.
176          */
177         u32 offset;
178 +
179 +       /* Our allocation in LBM for temporary storage during scaling. */
180 +       struct drm_mm_node lbm;
181  };
182  
183  static inline struct vc4_plane_state *
184 @@ -106,6 +118,16 @@ static const struct hvs_format *vc4_get_
185         return NULL;
186  }
187  
188 +static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
189 +{
190 +       if (dst > src)
191 +               return VC4_SCALING_PPF;
192 +       else if (dst < src)
193 +               return VC4_SCALING_TPZ;
194 +       else
195 +               return VC4_SCALING_NONE;
196 +}
197 +
198  static bool plane_enabled(struct drm_plane_state *state)
199  {
200         return state->fb && state->crtc;
201 @@ -122,6 +144,8 @@ static struct drm_plane_state *vc4_plane
202         if (!vc4_state)
203                 return NULL;
204  
205 +       memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
206 +
207         __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
208  
209         if (vc4_state->dlist) {
210 @@ -141,8 +165,17 @@ static struct drm_plane_state *vc4_plane
211  static void vc4_plane_destroy_state(struct drm_plane *plane,
212                                     struct drm_plane_state *state)
213  {
214 +       struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
215         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
216  
217 +       if (vc4_state->lbm.allocated) {
218 +               unsigned long irqflags;
219 +
220 +               spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
221 +               drm_mm_remove_node(&vc4_state->lbm);
222 +               spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
223 +       }
224 +
225         kfree(vc4_state->dlist);
226         __drm_atomic_helper_plane_destroy_state(plane, &vc4_state->base);
227         kfree(state);
228 @@ -181,23 +214,60 @@ static void vc4_dlist_write(struct vc4_p
229         vc4_state->dlist[vc4_state->dlist_count++] = val;
230  }
231  
232 +/* Returns the scl0/scl1 field based on whether the dimensions need to
233 + * be up/down/non-scaled.
234 + *
235 + * This is a replication of a table from the spec.
236 + */
237 +static u32 vc4_get_scl_field(struct drm_plane_state *state)
238 +{
239 +       struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
240 +
241 +       switch (vc4_state->x_scaling << 2 | vc4_state->y_scaling) {
242 +       case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
243 +               return SCALER_CTL0_SCL_H_PPF_V_PPF;
244 +       case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
245 +               return SCALER_CTL0_SCL_H_TPZ_V_PPF;
246 +       case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
247 +               return SCALER_CTL0_SCL_H_PPF_V_TPZ;
248 +       case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
249 +               return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
250 +       case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
251 +               return SCALER_CTL0_SCL_H_PPF_V_NONE;
252 +       case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
253 +               return SCALER_CTL0_SCL_H_NONE_V_PPF;
254 +       case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
255 +               return SCALER_CTL0_SCL_H_NONE_V_TPZ;
256 +       case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
257 +               return SCALER_CTL0_SCL_H_TPZ_V_NONE;
258 +       default:
259 +       case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
260 +               /* The unity case is independently handled by
261 +                * SCALER_CTL0_UNITY.
262 +                */
263 +               return 0;
264 +       }
265 +}
266 +
267  static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
268  {
269 +       struct drm_plane *plane = state->plane;
270         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
271         struct drm_framebuffer *fb = state->fb;
272 +       u32 subpixel_src_mask = (1 << 16) - 1;
273  
274         vc4_state->offset = fb->offsets[0];
275  
276 -       if (state->crtc_w << 16 != state->src_w ||
277 -           state->crtc_h << 16 != state->src_h) {
278 -               /* We don't support scaling yet, which involves
279 -                * allocating the LBM memory for scaling temporary
280 -                * storage, and putting filter kernels in the HVS
281 -                * context.
282 -                */
283 +       /* We don't support subpixel source positioning for scaling. */
284 +       if ((state->src_x & subpixel_src_mask) ||
285 +           (state->src_y & subpixel_src_mask) ||
286 +           (state->src_w & subpixel_src_mask) ||
287 +           (state->src_h & subpixel_src_mask)) {
288                 return -EINVAL;
289         }
290  
291 +       vc4_state->src_x = state->src_x >> 16;
292 +       vc4_state->src_y = state->src_y >> 16;
293         vc4_state->src_w = state->src_w >> 16;
294         vc4_state->src_h = state->src_h >> 16;
295  
296 @@ -206,6 +276,23 @@ static int vc4_plane_setup_clipping_and_
297         vc4_state->crtc_w = state->crtc_w;
298         vc4_state->crtc_h = state->crtc_h;
299  
300 +       vc4_state->x_scaling = vc4_get_scaling_mode(vc4_state->src_w,
301 +                                                   vc4_state->crtc_w);
302 +       vc4_state->y_scaling = vc4_get_scaling_mode(vc4_state->src_h,
303 +                                                   vc4_state->crtc_h);
304 +       vc4_state->is_unity = (vc4_state->x_scaling == VC4_SCALING_NONE &&
305 +                              vc4_state->y_scaling == VC4_SCALING_NONE);
306 +
307 +       /* No configuring scaling on the cursor plane, since it gets
308 +          non-vblank-synced updates, and scaling requires requires
309 +          LBM changes which have to be vblank-synced.
310 +        */
311 +       if (plane->type == DRM_PLANE_TYPE_CURSOR && !vc4_state->is_unity)
312 +               return -EINVAL;
313 +
314 +       /* Clamp the on-screen start x/y to 0.  The hardware doesn't
315 +        * support negative y, and negative x wastes bandwidth.
316 +        */
317         if (vc4_state->crtc_x < 0) {
318                 vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format,
319                                                            0) *
320 @@ -223,6 +310,87 @@ static int vc4_plane_setup_clipping_and_
321         return 0;
322  }
323  
324 +static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
325 +{
326 +       u32 scale, recip;
327 +
328 +       scale = (1 << 16) * src / dst;
329 +
330 +       /* The specs note that while the reciprocal would be defined
331 +        * as (1<<32)/scale, ~0 is close enough.
332 +        */
333 +       recip = ~0 / scale;
334 +
335 +       vc4_dlist_write(vc4_state,
336 +                       VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
337 +                       VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
338 +       vc4_dlist_write(vc4_state,
339 +                       VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
340 +}
341 +
342 +static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
343 +{
344 +       u32 scale = (1 << 16) * src / dst;
345 +
346 +       vc4_dlist_write(vc4_state,
347 +                       SCALER_PPF_AGC |
348 +                       VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
349 +                       VC4_SET_FIELD(0, SCALER_PPF_IPHASE));
350 +}
351 +
352 +static u32 vc4_lbm_size(struct drm_plane_state *state)
353 +{
354 +       struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
355 +       /* This is the worst case number.  One of the two sizes will
356 +        * be used depending on the scaling configuration.
357 +        */
358 +       u32 pix_per_line = max(vc4_state->src_w, (u32)vc4_state->crtc_w);
359 +       u32 lbm;
360 +
361 +       if (vc4_state->is_unity)
362 +               return 0;
363 +       else if (vc4_state->y_scaling == VC4_SCALING_TPZ)
364 +               lbm = pix_per_line * 8;
365 +       else {
366 +               /* In special cases, this multiplier might be 12. */
367 +               lbm = pix_per_line * 16;
368 +       }
369 +
370 +       lbm = roundup(lbm, 32);
371 +
372 +       return lbm;
373 +}
374 +
375 +static void vc4_write_scaling_parameters(struct drm_plane_state *state)
376 +{
377 +       struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
378 +
379 +       /* Ch0 H-PPF Word 0: Scaling Parameters */
380 +       if (vc4_state->x_scaling == VC4_SCALING_PPF) {
381 +               vc4_write_ppf(vc4_state,
382 +                             vc4_state->src_w, vc4_state->crtc_w);
383 +       }
384 +
385 +       /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
386 +       if (vc4_state->y_scaling == VC4_SCALING_PPF) {
387 +               vc4_write_ppf(vc4_state,
388 +                             vc4_state->src_h, vc4_state->crtc_h);
389 +               vc4_dlist_write(vc4_state, 0xc0c0c0c0);
390 +       }
391 +
392 +       /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
393 +       if (vc4_state->x_scaling == VC4_SCALING_TPZ) {
394 +               vc4_write_tpz(vc4_state,
395 +                             vc4_state->src_w, vc4_state->crtc_w);
396 +       }
397 +
398 +       /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
399 +       if (vc4_state->y_scaling == VC4_SCALING_TPZ) {
400 +               vc4_write_tpz(vc4_state,
401 +                             vc4_state->src_h, vc4_state->crtc_h);
402 +               vc4_dlist_write(vc4_state, 0xc0c0c0c0);
403 +       }
404 +}
405  
406  /* Writes out a full display list for an active plane to the plane's
407   * private dlist state.
408 @@ -230,22 +398,50 @@ static int vc4_plane_setup_clipping_and_
409  static int vc4_plane_mode_set(struct drm_plane *plane,
410                               struct drm_plane_state *state)
411  {
412 +       struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
413         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
414         struct drm_framebuffer *fb = state->fb;
415         struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
416         u32 ctl0_offset = vc4_state->dlist_count;
417         const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format);
418 +       u32 scl;
419 +       u32 lbm_size;
420 +       unsigned long irqflags;
421         int ret;
422  
423         ret = vc4_plane_setup_clipping_and_scaling(state);
424         if (ret)
425                 return ret;
426  
427 +       /* Allocate the LBM memory that the HVS will use for temporary
428 +        * storage due to our scaling/format conversion.
429 +        */
430 +       lbm_size = vc4_lbm_size(state);
431 +       if (lbm_size) {
432 +               if (!vc4_state->lbm.allocated) {
433 +                       spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
434 +                       ret = drm_mm_insert_node(&vc4->hvs->lbm_mm,
435 +                                                &vc4_state->lbm,
436 +                                                lbm_size, 32, 0);
437 +                       spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
438 +               } else {
439 +                       WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
440 +               }
441 +       }
442 +
443 +       if (ret)
444 +               return ret;
445 +
446 +       scl = vc4_get_scl_field(state);
447 +
448 +       /* Control word */
449         vc4_dlist_write(vc4_state,
450                         SCALER_CTL0_VALID |
451                         (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
452                         (format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
453 -                       SCALER_CTL0_UNITY);
454 +                       (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
455 +                       VC4_SET_FIELD(scl, SCALER_CTL0_SCL0) |
456 +                       VC4_SET_FIELD(scl, SCALER_CTL0_SCL1));
457  
458         /* Position Word 0: Image Positions and Alpha Value */
459         vc4_state->pos0_offset = vc4_state->dlist_count;
460 @@ -254,9 +450,14 @@ static int vc4_plane_mode_set(struct drm
461                         VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
462                         VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
463  
464 -       /* Position Word 1: Scaled Image Dimensions.
465 -        * Skipped due to SCALER_CTL0_UNITY scaling.
466 -        */
467 +       /* Position Word 1: Scaled Image Dimensions. */
468 +       if (!vc4_state->is_unity) {
469 +               vc4_dlist_write(vc4_state,
470 +                               VC4_SET_FIELD(vc4_state->crtc_w,
471 +                                             SCALER_POS1_SCL_WIDTH) |
472 +                               VC4_SET_FIELD(vc4_state->crtc_h,
473 +                                             SCALER_POS1_SCL_HEIGHT));
474 +       }
475  
476         /* Position Word 2: Source Image Size, Alpha Mode */
477         vc4_state->pos2_offset = vc4_state->dlist_count;
478 @@ -282,6 +483,32 @@ static int vc4_plane_mode_set(struct drm
479         vc4_dlist_write(vc4_state,
480                         VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH));
481  
482 +       if (!vc4_state->is_unity) {
483 +               /* LBM Base Address. */
484 +               if (vc4_state->y_scaling != VC4_SCALING_NONE)
485 +                       vc4_dlist_write(vc4_state, vc4_state->lbm.start);
486 +
487 +               vc4_write_scaling_parameters(state);
488 +
489 +               /* If any PPF setup was done, then all the kernel
490 +                * pointers get uploaded.
491 +                */
492 +               if (vc4_state->x_scaling == VC4_SCALING_PPF ||
493 +                   vc4_state->y_scaling == VC4_SCALING_PPF) {
494 +                       u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
495 +                                                  SCALER_PPF_KERNEL_OFFSET);
496 +
497 +                       /* HPPF plane 0 */
498 +                       vc4_dlist_write(vc4_state, kernel);
499 +                       /* VPPF plane 0 */
500 +                       vc4_dlist_write(vc4_state, kernel);
501 +                       /* HPPF plane 1 */
502 +                       vc4_dlist_write(vc4_state, kernel);
503 +                       /* VPPF plane 1 */
504 +                       vc4_dlist_write(vc4_state, kernel);
505 +               }
506 +       }
507 +
508         vc4_state->dlist[ctl0_offset] |=
509                 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
510  
511 --- a/drivers/gpu/drm/vc4/vc4_regs.h
512 +++ b/drivers/gpu/drm/vc4/vc4_regs.h
513 @@ -536,6 +536,21 @@ enum hvs_pixel_format {
514  #define SCALER_CTL0_ORDER_MASK                 VC4_MASK(14, 13)
515  #define SCALER_CTL0_ORDER_SHIFT                        13
516  
517 +#define SCALER_CTL0_SCL1_MASK                  VC4_MASK(10, 8)
518 +#define SCALER_CTL0_SCL1_SHIFT                 8
519 +
520 +#define SCALER_CTL0_SCL0_MASK                  VC4_MASK(7, 5)
521 +#define SCALER_CTL0_SCL0_SHIFT                 5
522 +
523 +#define SCALER_CTL0_SCL_H_PPF_V_PPF            0
524 +#define SCALER_CTL0_SCL_H_TPZ_V_PPF            1
525 +#define SCALER_CTL0_SCL_H_PPF_V_TPZ            2
526 +#define SCALER_CTL0_SCL_H_TPZ_V_TPZ            3
527 +#define SCALER_CTL0_SCL_H_PPF_V_NONE           4
528 +#define SCALER_CTL0_SCL_H_NONE_V_PPF           5
529 +#define SCALER_CTL0_SCL_H_NONE_V_TPZ           6
530 +#define SCALER_CTL0_SCL_H_TPZ_V_NONE           7
531 +
532  /* Set to indicate no scaling. */
533  #define SCALER_CTL0_UNITY                      BIT(4)
534  
535 @@ -551,6 +566,12 @@ enum hvs_pixel_format {
536  #define SCALER_POS0_START_X_MASK               VC4_MASK(11, 0)
537  #define SCALER_POS0_START_X_SHIFT              0
538  
539 +#define SCALER_POS1_SCL_HEIGHT_MASK            VC4_MASK(27, 16)
540 +#define SCALER_POS1_SCL_HEIGHT_SHIFT           16
541 +
542 +#define SCALER_POS1_SCL_WIDTH_MASK             VC4_MASK(11, 0)
543 +#define SCALER_POS1_SCL_WIDTH_SHIFT            0
544 +
545  #define SCALER_POS2_ALPHA_MODE_MASK            VC4_MASK(31, 30)
546  #define SCALER_POS2_ALPHA_MODE_SHIFT           30
547  #define SCALER_POS2_ALPHA_MODE_PIPELINE                0
548 @@ -564,6 +585,31 @@ enum hvs_pixel_format {
549  #define SCALER_POS2_WIDTH_MASK                 VC4_MASK(11, 0)
550  #define SCALER_POS2_WIDTH_SHIFT                        0
551  
552 +#define SCALER_TPZ0_VERT_RECALC                        BIT(31)
553 +#define SCALER_TPZ0_SCALE_MASK                 VC4_MASK(28, 8)
554 +#define SCALER_TPZ0_SCALE_SHIFT                        8
555 +#define SCALER_TPZ0_IPHASE_MASK                        VC4_MASK(7, 0)
556 +#define SCALER_TPZ0_IPHASE_SHIFT               0
557 +#define SCALER_TPZ1_RECIP_MASK                 VC4_MASK(15, 0)
558 +#define SCALER_TPZ1_RECIP_SHIFT                        0
559 +
560 +/* Skips interpolating coefficients to 64 phases, so just 8 are used.
561 + * Required for nearest neighbor.
562 + */
563 +#define SCALER_PPF_NOINTERP                    BIT(31)
564 +/* Replaes the highest valued coefficient with one that makes all 4
565 + * sum to unity.
566 + */
567 +#define SCALER_PPF_AGC                         BIT(30)
568 +#define SCALER_PPF_SCALE_MASK                  VC4_MASK(24, 8)
569 +#define SCALER_PPF_SCALE_SHIFT                 8
570 +#define SCALER_PPF_IPHASE_MASK                 VC4_MASK(6, 0)
571 +#define SCALER_PPF_IPHASE_SHIFT                        0
572 +
573 +#define SCALER_PPF_KERNEL_OFFSET_MASK          VC4_MASK(13, 0)
574 +#define SCALER_PPF_KERNEL_OFFSET_SHIFT         0
575 +#define SCALER_PPF_KERNEL_UNCACHED             BIT(31)
576 +
577  #define SCALER_SRC_PITCH_MASK                  VC4_MASK(15, 0)
578  #define SCALER_SRC_PITCH_SHIFT                 0
579