brcm2708: organize kernel patches
[openwrt/openwrt.git] / target / linux / brcm2708 / patches-4.19 / 950-0099-AXI-performance-monitor-driver-2222.patch
diff --git a/target/linux/brcm2708/patches-4.19/950-0099-AXI-performance-monitor-driver-2222.patch b/target/linux/brcm2708/patches-4.19/950-0099-AXI-performance-monitor-driver-2222.patch
new file mode 100644 (file)
index 0000000..02de46b
--- /dev/null
@@ -0,0 +1,681 @@
+From b683c668fef086a8c723d55e88364405047d2196 Mon Sep 17 00:00:00 2001
+From: James Hughes <JamesH65@users.noreply.github.com>
+Date: Tue, 14 Nov 2017 15:13:15 +0000
+Subject: [PATCH] AXI performance monitor driver (#2222)
+
+Uses the debugfs I/F to provide access to the AXI
+bus performance monitors.
+
+Requires the new mailbox peripheral access for access
+to the VPU performance registers, system bus access
+is done using direct register reads.
+
+Signed-off-by: James Hughes <james.hughes@raspberrypi.org>
+---
+ drivers/perf/Kconfig                   |   7 +
+ drivers/perf/Makefile                  |   1 +
+ drivers/perf/raspberrypi_axi_monitor.c | 637 +++++++++++++++++++++++++
+ 3 files changed, 645 insertions(+)
+ create mode 100644 drivers/perf/raspberrypi_axi_monitor.c
+
+--- a/drivers/perf/Kconfig
++++ b/drivers/perf/Kconfig
+@@ -102,4 +102,11 @@ config ARM_SPE_PMU
+         Extension, which provides periodic sampling of operations in
+         the CPU pipeline and reports this via the perf AUX interface.
++config RPI_AXIPERF
++        depends on ARCH_BCM2835
++        tristate "RaspberryPi AXI Performance monitors"
++        default n
++        help
++          Say y if you want to use Raspberry Pi AXI performance monitors, m if
++          you want to build it as a module.
+ endmenu
+--- a/drivers/perf/Makefile
++++ b/drivers/perf/Makefile
+@@ -9,3 +9,4 @@ obj-$(CONFIG_QCOM_L2_PMU)      += qcom_l2_pmu
+ obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
+ obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
+ obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
++obj-$(CONFIG_RPI_AXIPERF) += raspberrypi_axi_monitor.o
+--- /dev/null
++++ b/drivers/perf/raspberrypi_axi_monitor.c
+@@ -0,0 +1,637 @@
++/*
++ * raspberrypi_axi_monitor.c
++ *
++ * Author: james.hughes@raspberrypi.org
++ *
++ * Raspberry Pi AXI performance counters.
++ *
++ * Copyright (C) 2017 Raspberry Pi Trading Ltd.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include <linux/debugfs.h>
++#include <linux/devcoredump.h>
++#include <linux/device.h>
++#include <linux/kthread.h>
++#include <linux/module.h>
++#include <linux/netdevice.h>
++#include <linux/mutex.h>
++#include <linux/of.h>
++#include <linux/platform_device.h>
++
++#include <soc/bcm2835/raspberrypi-firmware.h>
++
++#define NUM_MONITORS 2
++#define NUM_BUS_WATCHERS_PER_MONITOR 3
++
++#define SYSTEM_MONITOR 0
++#define VPU_MONITOR 1
++
++#define MAX_BUSES 16
++#define DEFAULT_SAMPLE_TIME 100
++
++#define NUM_BUS_WATCHER_RESULTS 9
++
++struct bus_watcher_data {
++      union   {
++              u32 results[NUM_BUS_WATCHER_RESULTS];
++              struct {
++                      u32 atrans;
++                      u32 atwait;
++                      u32 amax;
++                      u32 wtrans;
++                      u32 wtwait;
++                      u32 wmax;
++                      u32 rtrans;
++                      u32 rtwait;
++                      u32 rmax;
++              };
++      };
++};
++
++
++struct rpi_axiperf {
++      struct platform_device *dev;
++      struct dentry *root_folder;
++
++      struct task_struct *monitor_thread;
++      struct mutex lock;
++
++      struct rpi_firmware *firmware;
++
++      /* Sample time spent on for each bus */
++      int sample_time;
++
++      /* Now storage for the per monitor settings and the resulting
++       * performance figures
++       */
++      struct {
++              /* Bit field of buses we want to monitor */
++              int bus_enabled;
++              /* Bit field of buses to filter by */
++              int bus_filter;
++              /* The current buses being monitored on this monitor */
++              int current_bus[NUM_BUS_WATCHERS_PER_MONITOR];
++              /* The last bus monitored on this monitor */
++              int last_monitored;
++
++              /* Set true if this mailbox must use the mailbox interface
++               * rather than access registers directly.
++               */
++              int use_mailbox_interface;
++
++              /* Current result values */
++              struct bus_watcher_data results[MAX_BUSES];
++
++              struct dentry *debugfs_entry;
++              void __iomem *base_address;
++
++      }  monitor[NUM_MONITORS];
++
++};
++
++static struct rpi_axiperf *state;
++
++/* Two monitors, System and VPU, each with the following register sets.
++ * Each monitor can only monitor one bus at a time, so we time share them,
++ * giving each bus 100ms (default, settable via debugfs) of time on its
++ * associated monitor
++ * Record results from the three Bus watchers per monitor and push to the sysfs
++ */
++
++/* general registers */
++const int GEN_CTRL;
++
++const int GEN_CTL_ENABLE_BIT  = BIT(0);
++const int GEN_CTL_RESET_BIT   = BIT(1);
++
++/* Bus watcher registers */
++const int BW_PITCH            = 0x40;
++
++const int BW0_CTRL            = 0x40;
++const int BW1_CTRL            = 0x80;
++const int BW2_CTRL            = 0xc0;
++
++const int BW_ATRANS_OFFSET    = 0x04;
++const int BW_ATWAIT_OFFSET    = 0x08;
++const int BW_AMAX_OFFSET      = 0x0c;
++const int BW_WTRANS_OFFSET    = 0x10;
++const int BW_WTWAIT_OFFSET    = 0x14;
++const int BW_WMAX_OFFSET      = 0x18;
++const int BW_RTRANS_OFFSET    = 0x1c;
++const int BW_RTWAIT_OFFSET    = 0x20;
++const int BW_RMAX_OFFSET      = 0x24;
++
++const int BW_CTRL_RESET_BIT   = BIT(31);
++const int BW_CTRL_ENABLE_BIT  = BIT(30);
++const int BW_CTRL_ENABLE_ID_FILTER_BIT        = BIT(29);
++const int BW_CTRL_LIMIT_HALT_BIT      = BIT(28);
++
++const int BW_CTRL_SOURCE_SHIFT        = 8;
++const int BW_CTRL_SOURCE_MASK = GENMASK(12, 8); // 5 bits
++const int BW_CTRL_BUS_WATCH_SHIFT;
++const int BW_CTRL_BUS_WATCH_MASK = GENMASK(5, 0); // 6 bits
++const int BW_CTRL_BUS_FILTER_SHIFT = 8;
++
++const static char *bus_filter_strings[] = {
++      "",
++      "CORE0_V",
++      "ICACHE0",
++      "DCACHE0",
++      "CORE1_V",
++      "ICACHE1",
++      "DCACHE1",
++      "L2_MAIN",
++      "HOST_PORT",
++      "HOST_PORT2",
++      "HVS",
++      "ISP",
++      "VIDEO_DCT",
++      "VIDEO_SD2AXI",
++      "CAM0",
++      "CAM1",
++      "DMA0",
++      "DMA1",
++      "DMA2_VPU",
++      "JPEG",
++      "VIDEO_CME",
++      "TRANSPOSER",
++      "VIDEO_FME",
++      "CCP2TX",
++      "USB",
++      "V3D0",
++      "V3D1",
++      "V3D2",
++      "AVE",
++      "DEBUG",
++      "CPU",
++      "M30"
++};
++
++const int num_bus_filters = ARRAY_SIZE(bus_filter_strings);
++
++const static char *system_bus_string[] = {
++      "DMA_L2",
++      "TRANS",
++      "JPEG",
++      "SYSTEM_UC",
++      "DMA_UC",
++      "SYSTEM_L2",
++      "CCP2TX",
++      "MPHI_RX",
++      "MPHI_TX",
++      "HVS",
++      "H264",
++      "ISP",
++      "V3D",
++      "PERIPHERAL",
++      "CPU_UC",
++      "CPU_L2"
++};
++
++const int num_system_buses = ARRAY_SIZE(system_bus_string);
++
++const static char *vpu_bus_string[] = {
++      "VPU1_D_L2",
++      "VPU0_D_L2",
++      "VPU1_I_L2",
++      "VPU0_I_L2",
++      "SYSTEM_L2",
++      "L2_FLUSH",
++      "DMA_L2",
++      "VPU1_D_UC",
++      "VPU0_D_UC",
++      "VPU1_I_UC",
++      "VPU0_I_UC",
++      "SYSTEM_UC",
++      "L2_OUT",
++      "DMA_UC",
++      "SDRAM",
++      "L2_IN"
++};
++
++const int num_vpu_buses = ARRAY_SIZE(vpu_bus_string);
++
++const static char *monitor_name[] = {
++      "System",
++      "VPU"
++};
++
++static inline void write_reg(int monitor, int reg, u32 value)
++{
++      writel(value, state->monitor[monitor].base_address + reg);
++}
++
++static inline u32 read_reg(int monitor, u32 reg)
++{
++      return readl(state->monitor[monitor].base_address + reg);
++}
++
++static void read_bus_watcher(int monitor, int watcher, u32 *results)
++{
++      if (state->monitor[monitor].use_mailbox_interface) {
++              /* We have 9 results, plus the overheads of start address and
++               * length So 11 u32 to define
++               */
++              u32 tmp[11];
++              int err;
++
++              tmp[0] = (u32)(state->monitor[monitor].base_address + watcher
++                              + BW_ATRANS_OFFSET);
++              tmp[1] = NUM_BUS_WATCHER_RESULTS;
++
++              err = rpi_firmware_property(state->firmware,
++                                          RPI_FIRMWARE_GET_PERIPH_REG,
++                                          tmp, sizeof(tmp));
++
++              if (err < 0 || tmp[1] != NUM_BUS_WATCHER_RESULTS)
++                      dev_err_once(&state->dev->dev,
++                                   "Failed to read bus watcher");
++              else
++                      memcpy(results, &tmp[2],
++                             NUM_BUS_WATCHER_RESULTS * sizeof(u32));
++      } else {
++              int i;
++              void __iomem *addr = state->monitor[monitor].base_address
++                              + watcher + BW_ATRANS_OFFSET;
++              for (i = 0; i < NUM_BUS_WATCHER_RESULTS; i++, addr += 4)
++                      *results++ = readl(addr);
++      }
++}
++
++static void set_monitor_control(int monitor, u32 set)
++{
++      if (state->monitor[monitor].use_mailbox_interface) {
++              u32 tmp[3] = {(u32)(state->monitor[monitor].base_address +
++                              GEN_CTRL), 1, set};
++              int err = rpi_firmware_property(state->firmware,
++                                              RPI_FIRMWARE_SET_PERIPH_REG,
++                                              tmp, sizeof(tmp));
++
++              if (err < 0 || tmp[1] != 1)
++                      dev_err_once(&state->dev->dev,
++                              "Failed to set monitor control");
++      } else
++              write_reg(monitor, GEN_CTRL, set);
++}
++
++static void set_bus_watcher_control(int monitor, int watcher, u32 set)
++{
++      if (state->monitor[monitor].use_mailbox_interface) {
++              u32 tmp[3] = {(u32)(state->monitor[monitor].base_address +
++                                  watcher), 1, set};
++              int err = rpi_firmware_property(state->firmware,
++                                              RPI_FIRMWARE_SET_PERIPH_REG,
++                                              tmp, sizeof(tmp));
++              if (err < 0 || tmp[1] != 1)
++                      dev_err_once(&state->dev->dev,
++                              "Failed to set bus watcher control");
++      } else
++              write_reg(monitor, watcher, set);
++}
++
++static void monitor(struct rpi_axiperf *state)
++{
++      int monitor, num_buses[NUM_MONITORS];
++
++      mutex_lock(&state->lock);
++
++      for (monitor = 0; monitor < NUM_MONITORS; monitor++) {
++              typeof(state->monitor[0]) *mon = &(state->monitor[monitor]);
++
++              /* Anything enabled? */
++              if (mon->bus_enabled == 0) {
++                      /* No, disable all monitoring for this monitor */
++                      set_monitor_control(monitor, GEN_CTL_RESET_BIT);
++              } else {
++                      int i;
++
++                      /* Find out how many busses we want to monitor, and
++                       * spread our 3 actual monitors over them
++                       */
++                      num_buses[monitor] = hweight32(mon->bus_enabled);
++                      num_buses[monitor] = min(num_buses[monitor],
++                                               NUM_BUS_WATCHERS_PER_MONITOR);
++
++                      for (i = 0; i < num_buses[monitor]; i++) {
++                              int bus_control;
++
++                              do {
++                                      mon->last_monitored++;
++                                      mon->last_monitored &= 0xf;
++                              } while ((mon->bus_enabled &
++                                       (1 << mon->last_monitored)) == 0);
++
++                              mon->current_bus[i] = mon->last_monitored;
++
++                              /* Reset the counters */
++                              set_bus_watcher_control(monitor,
++                                                      BW0_CTRL +
++                                                      i*BW_PITCH,
++                                                      BW_CTRL_RESET_BIT);
++
++                              bus_control = BW_CTRL_ENABLE_BIT |
++                                              mon->current_bus[i];
++
++                              if (mon->bus_filter) {
++                                      bus_control |=
++                                              BW_CTRL_ENABLE_ID_FILTER_BIT;
++                                      bus_control |=
++                                              ((mon->bus_filter & 0x1f)
++                                              << BW_CTRL_BUS_FILTER_SHIFT);
++                              }
++
++                              // Start capture
++                              set_bus_watcher_control(monitor,
++                                                      BW0_CTRL + i*BW_PITCH,
++                                                      bus_control);
++                      }
++              }
++
++              /* start monitoring */
++              set_monitor_control(monitor, GEN_CTL_ENABLE_BIT);
++      }
++
++      mutex_unlock(&state->lock);
++
++      msleep(state->sample_time);
++
++      /* Now read the results */
++
++      mutex_lock(&state->lock);
++      for (monitor = 0; monitor < NUM_MONITORS; monitor++) {
++              typeof(state->monitor[0]) *mon = &(state->monitor[monitor]);
++
++              /* Anything enabled? */
++              if (mon->bus_enabled == 0) {
++                      /* No, disable all monitoring for this monitor */
++                      set_monitor_control(monitor, 0);
++              } else {
++                      int i;
++
++                      for (i = 0; i < num_buses[monitor]; i++) {
++                              int bus = mon->current_bus[i];
++
++                              read_bus_watcher(monitor,
++                                      BW0_CTRL + i*BW_PITCH,
++                                      (u32 *)&mon->results[bus].results);
++                      }
++              }
++      }
++      mutex_unlock(&state->lock);
++}
++
++static int monitor_thread(void *data)
++{
++      struct rpi_axiperf *state  = data;
++
++      while (1) {
++              monitor(state);
++
++              if (kthread_should_stop())
++                      return 0;
++      }
++      return 0;
++}
++
++static ssize_t myreader(struct file *fp, char __user *user_buffer,
++                      size_t count, loff_t *position)
++{
++#define INIT_BUFF_SIZE 2048
++
++      int i;
++      int idx = (int)(fp->private_data);
++      int num_buses, cnt;
++      char *string_buffer;
++      int buff_size = INIT_BUFF_SIZE;
++      char *p;
++      typeof(state->monitor[0]) *mon = &(state->monitor[idx]);
++
++      if (idx < 0 || idx > NUM_MONITORS)
++              idx = 0;
++
++      num_buses = idx == SYSTEM_MONITOR ? num_system_buses : num_vpu_buses;
++
++      string_buffer = kmalloc(buff_size, GFP_KERNEL);
++
++      if (!string_buffer) {
++              dev_err(&state->dev->dev,
++                              "Failed temporary string allocation\n");
++              return 0;
++      }
++
++      p = string_buffer;
++
++      mutex_lock(&state->lock);
++
++      if (mon->bus_filter) {
++              int filt = min(mon->bus_filter & 0x1f, num_bus_filters);
++
++              cnt = snprintf(p, buff_size,
++                             "\nMonitoring transactions from %s only\n",
++                             bus_filter_strings[filt]);
++              p += cnt;
++              buff_size -= cnt;
++      }
++
++      cnt = snprintf(p, buff_size, "     Bus   |    Atrans    Atwait      AMax    Wtrans    Wtwait      WMax    Rtrans    Rtwait      RMax\n"
++                                   "======================================================================================================\n");
++
++      if (cnt >= buff_size)
++              goto done;
++
++      p += cnt;
++      buff_size -= cnt;
++
++      for (i = 0; i < num_buses; i++) {
++              if (mon->bus_enabled & (1 << i)) {
++#define DIVIDER (1024)
++                      typeof(mon->results[0]) *res = &(mon->results[i]);
++
++                      cnt = snprintf(p, buff_size,
++                                      "%10s | %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK\n",
++                                      idx == SYSTEM_MONITOR ?
++                                              system_bus_string[i] :
++                                              vpu_bus_string[i],
++                                      res->atrans/DIVIDER,
++                                      res->atwait/DIVIDER,
++                                      res->amax/DIVIDER,
++                                      res->wtrans/DIVIDER,
++                                      res->wtwait/DIVIDER,
++                                      res->wmax/DIVIDER,
++                                      res->rtrans/DIVIDER,
++                                      res->rtwait/DIVIDER,
++                                      res->rmax/DIVIDER
++                                      );
++                      if (cnt >= buff_size)
++                              goto done;
++
++                      p += cnt;
++                      buff_size -= cnt;
++              }
++      }
++
++      mutex_unlock(&state->lock);
++
++done:
++
++      /* did the last string entry exceeed our buffer size? ie out of string
++       * buffer space. Null terminate, use what we have.
++       */
++      if (cnt >= buff_size) {
++              buff_size = 0;
++              string_buffer[INIT_BUFF_SIZE] = 0;
++      }
++
++      cnt = simple_read_from_buffer(user_buffer, count, position,
++                                    string_buffer,
++                                    INIT_BUFF_SIZE - buff_size);
++
++      kfree(string_buffer);
++
++      return cnt;
++}
++
++static ssize_t mywriter(struct file *fp, const char __user *user_buffer,
++                      size_t count, loff_t *position)
++{
++      int idx = (int)(fp->private_data);
++
++      if (idx < 0 || idx > NUM_MONITORS)
++              idx = 0;
++
++      /* At the moment, this does nothing, but in the future it could be
++       * used to reset counters etc
++       */
++      return count;
++}
++
++static const struct file_operations fops_debug = {
++      .read = myreader,
++      .write = mywriter,
++      .open = simple_open
++};
++
++static int rpi_axiperf_probe(struct platform_device *pdev)
++{
++      int ret = 0, i;
++      struct device *dev = &pdev->dev;
++      struct device_node *np = dev->of_node;
++      struct device_node *fw_node;
++
++      state = kzalloc(sizeof(struct rpi_axiperf), GFP_KERNEL);
++      if (!state)
++              return -ENOMEM;
++
++      /* Get the firmware handle for future rpi-firmware-xxx calls */
++      fw_node = of_parse_phandle(np, "firmware", 0);
++      if (!fw_node) {
++              dev_err(dev, "Missing firmware node\n");
++              return -ENOENT;
++      }
++
++      state->firmware = rpi_firmware_get(fw_node);
++      if (!state->firmware)
++              return -EPROBE_DEFER;
++
++      /* Special case for the VPU monitor, we must use the mailbox interface
++       * as it is not accessible from the ARM address space.
++       */
++      state->monitor[VPU_MONITOR].use_mailbox_interface = 1;
++      state->monitor[SYSTEM_MONITOR].use_mailbox_interface = 0;
++
++      for (i = 0; i < NUM_MONITORS; i++) {
++              if (state->monitor[i].use_mailbox_interface) {
++                       of_property_read_u32_index(np, "reg", i*2,
++                              (u32 *)(&state->monitor[i].base_address));
++              } else {
++                      struct resource *resource =
++                              platform_get_resource(pdev, IORESOURCE_MEM, i);
++
++                      state->monitor[i].base_address =
++                              devm_ioremap_resource(&pdev->dev, resource);
++              }
++
++              if (IS_ERR(state->monitor[i].base_address))
++                      return PTR_ERR(state->monitor[i].base_address);
++
++              /* Enable all buses by default */
++              state->monitor[i].bus_enabled = 0xffff;
++      }
++
++      state->dev = pdev;
++      platform_set_drvdata(pdev, state);
++
++      state->sample_time = DEFAULT_SAMPLE_TIME;
++
++      /* Set up all the debugfs stuff */
++      state->root_folder = debugfs_create_dir(KBUILD_MODNAME, NULL);
++
++      for (i = 0; i < NUM_MONITORS; i++) {
++              state->monitor[i].debugfs_entry =
++                      debugfs_create_dir(monitor_name[i], state->root_folder);
++              if (IS_ERR(state->monitor[i].debugfs_entry))
++                      state->monitor[i].debugfs_entry = NULL;
++
++              debugfs_create_file("data", 0444,
++                                  state->monitor[i].debugfs_entry,
++                                  (void *)i, &fops_debug);
++              debugfs_create_u32("enable", 0644,
++                                 state->monitor[i].debugfs_entry,
++                                 &state->monitor[i].bus_enabled);
++              debugfs_create_u32("filter", 0644,
++                                 state->monitor[i].debugfs_entry,
++                                 &state->monitor[i].bus_filter);
++              debugfs_create_u32("sample_time", 0644,
++                                 state->monitor[i].debugfs_entry,
++                                 &state->sample_time);
++      }
++
++      mutex_init(&state->lock);
++
++      state->monitor_thread = kthread_run(monitor_thread, state,
++                                          "rpi-axiperfmon");
++
++      return ret;
++
++}
++
++static int rpi_axiperf_remove(struct platform_device *dev)
++{
++      int ret = 0;
++
++      kthread_stop(state->monitor_thread);
++
++      debugfs_remove_recursive(state->root_folder);
++      state->root_folder = NULL;
++
++      return ret;
++}
++
++static const struct of_device_id rpi_axiperf_match[] = {
++      {
++              .compatible = "brcm,bcm2835-axiperf",
++      },
++      {},
++};
++MODULE_DEVICE_TABLE(of, rpi_axiperf_match);
++
++static struct platform_driver rpi_axiperf_driver  = {
++      .probe =        rpi_axiperf_probe,
++      .remove =       rpi_axiperf_remove,
++      .driver = {
++              .name   = "rpi-bcm2835-axiperf",
++              .of_match_table = of_match_ptr(rpi_axiperf_match),
++      },
++};
++
++module_platform_driver(rpi_axiperf_driver);
++
++/* Module information */
++MODULE_AUTHOR("James Hughes <james.hughes@raspberrypi.org>");
++MODULE_DESCRIPTION("RPI AXI Performance monitor driver");
++MODULE_LICENSE("GPL");
++