brcm2708: update linux 4.4 patches to latest version
[openwrt/staging/lynxis/omap.git] / target / linux / brcm2708 / patches-4.4 / 0284-drm-vc4-Add-support-for-scaling-of-display-planes.patch
diff --git a/target/linux/brcm2708/patches-4.4/0284-drm-vc4-Add-support-for-scaling-of-display-planes.patch b/target/linux/brcm2708/patches-4.4/0284-drm-vc4-Add-support-for-scaling-of-display-planes.patch
deleted file mode 100644 (file)
index b7b985c..0000000
+++ /dev/null
@@ -1,579 +0,0 @@
-From 52e3b2b7276aebafa566574945c9d5854215add3 Mon Sep 17 00:00:00 2001
-From: Eric Anholt <eric@anholt.net>
-Date: Tue, 20 Oct 2015 16:06:57 +0100
-Subject: [PATCH 284/381] drm/vc4: Add support for scaling of display planes.
-
-This implements a simple policy for choosing scaling modes
-(trapezoidal for decimation, PPF for magnification), and a single PPF
-filter (Mitchell/Netravali's recommendation).
-
-Signed-off-by: Eric Anholt <eric@anholt.net>
-(cherry picked from commit 21af94cf1a4c2d3450ab7fead58e6e2291ab92a9)
----
- drivers/gpu/drm/vc4/vc4_drv.h   |   4 +
- drivers/gpu/drm/vc4/vc4_hvs.c   |  84 +++++++++++++
- drivers/gpu/drm/vc4/vc4_plane.c | 253 +++++++++++++++++++++++++++++++++++++---
- drivers/gpu/drm/vc4/vc4_regs.h  |  46 ++++++++
- 4 files changed, 374 insertions(+), 13 deletions(-)
-
---- a/drivers/gpu/drm/vc4/vc4_drv.h
-+++ b/drivers/gpu/drm/vc4/vc4_drv.h
-@@ -156,7 +156,11 @@ struct vc4_hvs {
-        * list.  Units are dwords.
-        */
-       struct drm_mm dlist_mm;
-+      /* Memory manager for the LBM memory used by HVS scaling. */
-+      struct drm_mm lbm_mm;
-       spinlock_t mm_lock;
-+
-+      struct drm_mm_node mitchell_netravali_filter;
- };
- struct vc4_plane {
---- a/drivers/gpu/drm/vc4/vc4_hvs.c
-+++ b/drivers/gpu/drm/vc4/vc4_hvs.c
-@@ -100,12 +100,76 @@ int vc4_hvs_debugfs_regs(struct seq_file
- }
- #endif
-+/* The filter kernel is composed of dwords each containing 3 9-bit
-+ * signed integers packed next to each other.
-+ */
-+#define VC4_INT_TO_COEFF(coeff) (coeff & 0x1ff)
-+#define VC4_PPF_FILTER_WORD(c0, c1, c2)                               \
-+      ((((c0) & 0x1ff) << 0) |                                \
-+       (((c1) & 0x1ff) << 9) |                                \
-+       (((c2) & 0x1ff) << 18))
-+
-+/* The whole filter kernel is arranged as the coefficients 0-16 going
-+ * up, then a pad, then 17-31 going down and reversed within the
-+ * dwords.  This means that a linear phase kernel (where it's
-+ * symmetrical at the boundary between 15 and 16) has the last 5
-+ * dwords matching the first 5, but reversed.
-+ */
-+#define VC4_LINEAR_PHASE_KERNEL(c0, c1, c2, c3, c4, c5, c6, c7, c8,   \
-+                              c9, c10, c11, c12, c13, c14, c15)       \
-+      {VC4_PPF_FILTER_WORD(c0, c1, c2),                               \
-+       VC4_PPF_FILTER_WORD(c3, c4, c5),                               \
-+       VC4_PPF_FILTER_WORD(c6, c7, c8),                               \
-+       VC4_PPF_FILTER_WORD(c9, c10, c11),                             \
-+       VC4_PPF_FILTER_WORD(c12, c13, c14),                            \
-+       VC4_PPF_FILTER_WORD(c15, c15, 0)}
-+
-+#define VC4_LINEAR_PHASE_KERNEL_DWORDS 6
-+#define VC4_KERNEL_DWORDS (VC4_LINEAR_PHASE_KERNEL_DWORDS * 2 - 1)
-+
-+/* Recommended B=1/3, C=1/3 filter choice from Mitchell/Netravali.
-+ * http://www.cs.utexas.edu/~fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf
-+ */
-+static const u32 mitchell_netravali_1_3_1_3_kernel[] =
-+      VC4_LINEAR_PHASE_KERNEL(0, -2, -6, -8, -10, -8, -3, 2, 18,
-+                              50, 82, 119, 155, 187, 213, 227);
-+
-+static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs,
-+                                      struct drm_mm_node *space,
-+                                      const u32 *kernel)
-+{
-+      int ret, i;
-+      u32 __iomem *dst_kernel;
-+
-+      ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS, 1,
-+                               0);
-+      if (ret) {
-+              DRM_ERROR("Failed to allocate space for filter kernel: %d\n",
-+                        ret);
-+              return ret;
-+      }
-+
-+      dst_kernel = hvs->dlist + space->start;
-+
-+      for (i = 0; i < VC4_KERNEL_DWORDS; i++) {
-+              if (i < VC4_LINEAR_PHASE_KERNEL_DWORDS)
-+                      writel(kernel[i], &dst_kernel[i]);
-+              else {
-+                      writel(kernel[VC4_KERNEL_DWORDS - i - 1],
-+                             &dst_kernel[i]);
-+              }
-+      }
-+
-+      return 0;
-+}
-+
- static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
- {
-       struct platform_device *pdev = to_platform_device(dev);
-       struct drm_device *drm = dev_get_drvdata(master);
-       struct vc4_dev *vc4 = drm->dev_private;
-       struct vc4_hvs *hvs = NULL;
-+      int ret;
-       hvs = devm_kzalloc(&pdev->dev, sizeof(*hvs), GFP_KERNEL);
-       if (!hvs)
-@@ -130,6 +194,22 @@ static int vc4_hvs_bind(struct device *d
-                   HVS_BOOTLOADER_DLIST_END,
-                   (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END);
-+      /* Set up the HVS LBM memory manager.  We could have some more
-+       * complicated data structure that allowed reuse of LBM areas
-+       * between planes when they don't overlap on the screen, but
-+       * for now we just allocate globally.
-+       */
-+      drm_mm_init(&hvs->lbm_mm, 0, 96 * 1024);
-+
-+      /* Upload filter kernels.  We only have the one for now, so we
-+       * keep it around for the lifetime of the driver.
-+       */
-+      ret = vc4_hvs_upload_linear_kernel(hvs,
-+                                         &hvs->mitchell_netravali_filter,
-+                                         mitchell_netravali_1_3_1_3_kernel);
-+      if (ret)
-+              return ret;
-+
-       vc4->hvs = hvs;
-       return 0;
- }
-@@ -140,7 +220,11 @@ static void vc4_hvs_unbind(struct device
-       struct drm_device *drm = dev_get_drvdata(master);
-       struct vc4_dev *vc4 = drm->dev_private;
-+      if (vc4->hvs->mitchell_netravali_filter.allocated)
-+              drm_mm_remove_node(&vc4->hvs->mitchell_netravali_filter);
-+
-       drm_mm_takedown(&vc4->hvs->dlist_mm);
-+      drm_mm_takedown(&vc4->hvs->lbm_mm);
-       vc4->hvs = NULL;
- }
---- a/drivers/gpu/drm/vc4/vc4_plane.c
-+++ b/drivers/gpu/drm/vc4/vc4_plane.c
-@@ -24,6 +24,12 @@
- #include "drm_fb_cma_helper.h"
- #include "drm_plane_helper.h"
-+enum vc4_scaling_mode {
-+      VC4_SCALING_NONE,
-+      VC4_SCALING_TPZ,
-+      VC4_SCALING_PPF,
-+};
-+
- struct vc4_plane_state {
-       struct drm_plane_state base;
-       /* System memory copy of the display list for this element, computed
-@@ -47,13 +53,19 @@ struct vc4_plane_state {
-       /* Clipped coordinates of the plane on the display. */
-       int crtc_x, crtc_y, crtc_w, crtc_h;
--      /* Clipped size of the area scanned from in the FB. */
--      u32 src_w, src_h;
-+      /* Clipped area being scanned from in the FB. */
-+      u32 src_x, src_y, src_w, src_h;
-+
-+      enum vc4_scaling_mode x_scaling, y_scaling;
-+      bool is_unity;
-       /* Offset to start scanning out from the start of the plane's
-        * BO.
-        */
-       u32 offset;
-+
-+      /* Our allocation in LBM for temporary storage during scaling. */
-+      struct drm_mm_node lbm;
- };
- static inline struct vc4_plane_state *
-@@ -106,6 +118,16 @@ static const struct hvs_format *vc4_get_
-       return NULL;
- }
-+static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
-+{
-+      if (dst > src)
-+              return VC4_SCALING_PPF;
-+      else if (dst < src)
-+              return VC4_SCALING_TPZ;
-+      else
-+              return VC4_SCALING_NONE;
-+}
-+
- static bool plane_enabled(struct drm_plane_state *state)
- {
-       return state->fb && state->crtc;
-@@ -122,6 +144,8 @@ static struct drm_plane_state *vc4_plane
-       if (!vc4_state)
-               return NULL;
-+      memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
-+
-       __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
-       if (vc4_state->dlist) {
-@@ -141,8 +165,17 @@ static struct drm_plane_state *vc4_plane
- static void vc4_plane_destroy_state(struct drm_plane *plane,
-                                   struct drm_plane_state *state)
- {
-+      struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
-       struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
-+      if (vc4_state->lbm.allocated) {
-+              unsigned long irqflags;
-+
-+              spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
-+              drm_mm_remove_node(&vc4_state->lbm);
-+              spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
-+      }
-+
-       kfree(vc4_state->dlist);
-       __drm_atomic_helper_plane_destroy_state(plane, &vc4_state->base);
-       kfree(state);
-@@ -181,23 +214,60 @@ static void vc4_dlist_write(struct vc4_p
-       vc4_state->dlist[vc4_state->dlist_count++] = val;
- }
-+/* Returns the scl0/scl1 field based on whether the dimensions need to
-+ * be up/down/non-scaled.
-+ *
-+ * This is a replication of a table from the spec.
-+ */
-+static u32 vc4_get_scl_field(struct drm_plane_state *state)
-+{
-+      struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
-+
-+      switch (vc4_state->x_scaling << 2 | vc4_state->y_scaling) {
-+      case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
-+              return SCALER_CTL0_SCL_H_PPF_V_PPF;
-+      case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
-+              return SCALER_CTL0_SCL_H_TPZ_V_PPF;
-+      case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
-+              return SCALER_CTL0_SCL_H_PPF_V_TPZ;
-+      case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
-+              return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
-+      case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
-+              return SCALER_CTL0_SCL_H_PPF_V_NONE;
-+      case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
-+              return SCALER_CTL0_SCL_H_NONE_V_PPF;
-+      case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
-+              return SCALER_CTL0_SCL_H_NONE_V_TPZ;
-+      case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
-+              return SCALER_CTL0_SCL_H_TPZ_V_NONE;
-+      default:
-+      case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
-+              /* The unity case is independently handled by
-+               * SCALER_CTL0_UNITY.
-+               */
-+              return 0;
-+      }
-+}
-+
- static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
- {
-+      struct drm_plane *plane = state->plane;
-       struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
-       struct drm_framebuffer *fb = state->fb;
-+      u32 subpixel_src_mask = (1 << 16) - 1;
-       vc4_state->offset = fb->offsets[0];
--      if (state->crtc_w << 16 != state->src_w ||
--          state->crtc_h << 16 != state->src_h) {
--              /* We don't support scaling yet, which involves
--               * allocating the LBM memory for scaling temporary
--               * storage, and putting filter kernels in the HVS
--               * context.
--               */
-+      /* We don't support subpixel source positioning for scaling. */
-+      if ((state->src_x & subpixel_src_mask) ||
-+          (state->src_y & subpixel_src_mask) ||
-+          (state->src_w & subpixel_src_mask) ||
-+          (state->src_h & subpixel_src_mask)) {
-               return -EINVAL;
-       }
-+      vc4_state->src_x = state->src_x >> 16;
-+      vc4_state->src_y = state->src_y >> 16;
-       vc4_state->src_w = state->src_w >> 16;
-       vc4_state->src_h = state->src_h >> 16;
-@@ -206,6 +276,23 @@ static int vc4_plane_setup_clipping_and_
-       vc4_state->crtc_w = state->crtc_w;
-       vc4_state->crtc_h = state->crtc_h;
-+      vc4_state->x_scaling = vc4_get_scaling_mode(vc4_state->src_w,
-+                                                  vc4_state->crtc_w);
-+      vc4_state->y_scaling = vc4_get_scaling_mode(vc4_state->src_h,
-+                                                  vc4_state->crtc_h);
-+      vc4_state->is_unity = (vc4_state->x_scaling == VC4_SCALING_NONE &&
-+                             vc4_state->y_scaling == VC4_SCALING_NONE);
-+
-+      /* No configuring scaling on the cursor plane, since it gets
-+         non-vblank-synced updates, and scaling requires requires
-+         LBM changes which have to be vblank-synced.
-+       */
-+      if (plane->type == DRM_PLANE_TYPE_CURSOR && !vc4_state->is_unity)
-+              return -EINVAL;
-+
-+      /* Clamp the on-screen start x/y to 0.  The hardware doesn't
-+       * support negative y, and negative x wastes bandwidth.
-+       */
-       if (vc4_state->crtc_x < 0) {
-               vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format,
-                                                          0) *
-@@ -223,6 +310,87 @@ static int vc4_plane_setup_clipping_and_
-       return 0;
- }
-+static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
-+{
-+      u32 scale, recip;
-+
-+      scale = (1 << 16) * src / dst;
-+
-+      /* The specs note that while the reciprocal would be defined
-+       * as (1<<32)/scale, ~0 is close enough.
-+       */
-+      recip = ~0 / scale;
-+
-+      vc4_dlist_write(vc4_state,
-+                      VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
-+                      VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
-+      vc4_dlist_write(vc4_state,
-+                      VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
-+}
-+
-+static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
-+{
-+      u32 scale = (1 << 16) * src / dst;
-+
-+      vc4_dlist_write(vc4_state,
-+                      SCALER_PPF_AGC |
-+                      VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
-+                      VC4_SET_FIELD(0, SCALER_PPF_IPHASE));
-+}
-+
-+static u32 vc4_lbm_size(struct drm_plane_state *state)
-+{
-+      struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
-+      /* This is the worst case number.  One of the two sizes will
-+       * be used depending on the scaling configuration.
-+       */
-+      u32 pix_per_line = max(vc4_state->src_w, (u32)vc4_state->crtc_w);
-+      u32 lbm;
-+
-+      if (vc4_state->is_unity)
-+              return 0;
-+      else if (vc4_state->y_scaling == VC4_SCALING_TPZ)
-+              lbm = pix_per_line * 8;
-+      else {
-+              /* In special cases, this multiplier might be 12. */
-+              lbm = pix_per_line * 16;
-+      }
-+
-+      lbm = roundup(lbm, 32);
-+
-+      return lbm;
-+}
-+
-+static void vc4_write_scaling_parameters(struct drm_plane_state *state)
-+{
-+      struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
-+
-+      /* Ch0 H-PPF Word 0: Scaling Parameters */
-+      if (vc4_state->x_scaling == VC4_SCALING_PPF) {
-+              vc4_write_ppf(vc4_state,
-+                            vc4_state->src_w, vc4_state->crtc_w);
-+      }
-+
-+      /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
-+      if (vc4_state->y_scaling == VC4_SCALING_PPF) {
-+              vc4_write_ppf(vc4_state,
-+                            vc4_state->src_h, vc4_state->crtc_h);
-+              vc4_dlist_write(vc4_state, 0xc0c0c0c0);
-+      }
-+
-+      /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
-+      if (vc4_state->x_scaling == VC4_SCALING_TPZ) {
-+              vc4_write_tpz(vc4_state,
-+                            vc4_state->src_w, vc4_state->crtc_w);
-+      }
-+
-+      /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
-+      if (vc4_state->y_scaling == VC4_SCALING_TPZ) {
-+              vc4_write_tpz(vc4_state,
-+                            vc4_state->src_h, vc4_state->crtc_h);
-+              vc4_dlist_write(vc4_state, 0xc0c0c0c0);
-+      }
-+}
- /* Writes out a full display list for an active plane to the plane's
-  * private dlist state.
-@@ -230,22 +398,50 @@ static int vc4_plane_setup_clipping_and_
- static int vc4_plane_mode_set(struct drm_plane *plane,
-                             struct drm_plane_state *state)
- {
-+      struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
-       struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
-       struct drm_framebuffer *fb = state->fb;
-       struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
-       u32 ctl0_offset = vc4_state->dlist_count;
-       const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format);
-+      u32 scl;
-+      u32 lbm_size;
-+      unsigned long irqflags;
-       int ret;
-       ret = vc4_plane_setup_clipping_and_scaling(state);
-       if (ret)
-               return ret;
-+      /* Allocate the LBM memory that the HVS will use for temporary
-+       * storage due to our scaling/format conversion.
-+       */
-+      lbm_size = vc4_lbm_size(state);
-+      if (lbm_size) {
-+              if (!vc4_state->lbm.allocated) {
-+                      spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
-+                      ret = drm_mm_insert_node(&vc4->hvs->lbm_mm,
-+                                               &vc4_state->lbm,
-+                                               lbm_size, 32, 0);
-+                      spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
-+              } else {
-+                      WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
-+              }
-+      }
-+
-+      if (ret)
-+              return ret;
-+
-+      scl = vc4_get_scl_field(state);
-+
-+      /* Control word */
-       vc4_dlist_write(vc4_state,
-                       SCALER_CTL0_VALID |
-                       (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
-                       (format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
--                      SCALER_CTL0_UNITY);
-+                      (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
-+                      VC4_SET_FIELD(scl, SCALER_CTL0_SCL0) |
-+                      VC4_SET_FIELD(scl, SCALER_CTL0_SCL1));
-       /* Position Word 0: Image Positions and Alpha Value */
-       vc4_state->pos0_offset = vc4_state->dlist_count;
-@@ -254,9 +450,14 @@ static int vc4_plane_mode_set(struct drm
-                       VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
-                       VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
--      /* Position Word 1: Scaled Image Dimensions.
--       * Skipped due to SCALER_CTL0_UNITY scaling.
--       */
-+      /* Position Word 1: Scaled Image Dimensions. */
-+      if (!vc4_state->is_unity) {
-+              vc4_dlist_write(vc4_state,
-+                              VC4_SET_FIELD(vc4_state->crtc_w,
-+                                            SCALER_POS1_SCL_WIDTH) |
-+                              VC4_SET_FIELD(vc4_state->crtc_h,
-+                                            SCALER_POS1_SCL_HEIGHT));
-+      }
-       /* Position Word 2: Source Image Size, Alpha Mode */
-       vc4_state->pos2_offset = vc4_state->dlist_count;
-@@ -282,6 +483,32 @@ static int vc4_plane_mode_set(struct drm
-       vc4_dlist_write(vc4_state,
-                       VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH));
-+      if (!vc4_state->is_unity) {
-+              /* LBM Base Address. */
-+              if (vc4_state->y_scaling != VC4_SCALING_NONE)
-+                      vc4_dlist_write(vc4_state, vc4_state->lbm.start);
-+
-+              vc4_write_scaling_parameters(state);
-+
-+              /* If any PPF setup was done, then all the kernel
-+               * pointers get uploaded.
-+               */
-+              if (vc4_state->x_scaling == VC4_SCALING_PPF ||
-+                  vc4_state->y_scaling == VC4_SCALING_PPF) {
-+                      u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
-+                                                 SCALER_PPF_KERNEL_OFFSET);
-+
-+                      /* HPPF plane 0 */
-+                      vc4_dlist_write(vc4_state, kernel);
-+                      /* VPPF plane 0 */
-+                      vc4_dlist_write(vc4_state, kernel);
-+                      /* HPPF plane 1 */
-+                      vc4_dlist_write(vc4_state, kernel);
-+                      /* VPPF plane 1 */
-+                      vc4_dlist_write(vc4_state, kernel);
-+              }
-+      }
-+
-       vc4_state->dlist[ctl0_offset] |=
-               VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
---- a/drivers/gpu/drm/vc4/vc4_regs.h
-+++ b/drivers/gpu/drm/vc4/vc4_regs.h
-@@ -536,6 +536,21 @@ enum hvs_pixel_format {
- #define SCALER_CTL0_ORDER_MASK                        VC4_MASK(14, 13)
- #define SCALER_CTL0_ORDER_SHIFT                       13
-+#define SCALER_CTL0_SCL1_MASK                 VC4_MASK(10, 8)
-+#define SCALER_CTL0_SCL1_SHIFT                        8
-+
-+#define SCALER_CTL0_SCL0_MASK                 VC4_MASK(7, 5)
-+#define SCALER_CTL0_SCL0_SHIFT                        5
-+
-+#define SCALER_CTL0_SCL_H_PPF_V_PPF           0
-+#define SCALER_CTL0_SCL_H_TPZ_V_PPF           1
-+#define SCALER_CTL0_SCL_H_PPF_V_TPZ           2
-+#define SCALER_CTL0_SCL_H_TPZ_V_TPZ           3
-+#define SCALER_CTL0_SCL_H_PPF_V_NONE          4
-+#define SCALER_CTL0_SCL_H_NONE_V_PPF          5
-+#define SCALER_CTL0_SCL_H_NONE_V_TPZ          6
-+#define SCALER_CTL0_SCL_H_TPZ_V_NONE          7
-+
- /* Set to indicate no scaling. */
- #define SCALER_CTL0_UNITY                     BIT(4)
-@@ -551,6 +566,12 @@ enum hvs_pixel_format {
- #define SCALER_POS0_START_X_MASK              VC4_MASK(11, 0)
- #define SCALER_POS0_START_X_SHIFT             0
-+#define SCALER_POS1_SCL_HEIGHT_MASK           VC4_MASK(27, 16)
-+#define SCALER_POS1_SCL_HEIGHT_SHIFT          16
-+
-+#define SCALER_POS1_SCL_WIDTH_MASK            VC4_MASK(11, 0)
-+#define SCALER_POS1_SCL_WIDTH_SHIFT           0
-+
- #define SCALER_POS2_ALPHA_MODE_MASK           VC4_MASK(31, 30)
- #define SCALER_POS2_ALPHA_MODE_SHIFT          30
- #define SCALER_POS2_ALPHA_MODE_PIPELINE               0
-@@ -564,6 +585,31 @@ enum hvs_pixel_format {
- #define SCALER_POS2_WIDTH_MASK                        VC4_MASK(11, 0)
- #define SCALER_POS2_WIDTH_SHIFT                       0
-+#define SCALER_TPZ0_VERT_RECALC                       BIT(31)
-+#define SCALER_TPZ0_SCALE_MASK                        VC4_MASK(28, 8)
-+#define SCALER_TPZ0_SCALE_SHIFT                       8
-+#define SCALER_TPZ0_IPHASE_MASK                       VC4_MASK(7, 0)
-+#define SCALER_TPZ0_IPHASE_SHIFT              0
-+#define SCALER_TPZ1_RECIP_MASK                        VC4_MASK(15, 0)
-+#define SCALER_TPZ1_RECIP_SHIFT                       0
-+
-+/* Skips interpolating coefficients to 64 phases, so just 8 are used.
-+ * Required for nearest neighbor.
-+ */
-+#define SCALER_PPF_NOINTERP                   BIT(31)
-+/* Replaes the highest valued coefficient with one that makes all 4
-+ * sum to unity.
-+ */
-+#define SCALER_PPF_AGC                                BIT(30)
-+#define SCALER_PPF_SCALE_MASK                 VC4_MASK(24, 8)
-+#define SCALER_PPF_SCALE_SHIFT                        8
-+#define SCALER_PPF_IPHASE_MASK                        VC4_MASK(6, 0)
-+#define SCALER_PPF_IPHASE_SHIFT                       0
-+
-+#define SCALER_PPF_KERNEL_OFFSET_MASK         VC4_MASK(13, 0)
-+#define SCALER_PPF_KERNEL_OFFSET_SHIFT                0
-+#define SCALER_PPF_KERNEL_UNCACHED            BIT(31)
-+
- #define SCALER_SRC_PITCH_MASK                 VC4_MASK(15, 0)
- #define SCALER_SRC_PITCH_SHIFT                        0