X-Git-Url: http://git.openwrt.org/?a=blobdiff_plain;f=target%2Flinux%2Fbrcm2708%2Fpatches-4.19%2F950-0097-AXI-performance-monitor-driver-2222.patch;fp=target%2Flinux%2Fbrcm2708%2Fpatches-4.19%2F950-0097-AXI-performance-monitor-driver-2222.patch;h=02de46bdc2795a48379603a634b45054a46dd01d;hb=c2308a7e4adbb2acc8ff149f91d1ca46801c135e;hp=0000000000000000000000000000000000000000;hpb=67dcc43f3a22dc3a7ac07a7065971b426feeb043;p=openwrt%2Fopenwrt.git diff --git a/target/linux/brcm2708/patches-4.19/950-0097-AXI-performance-monitor-driver-2222.patch b/target/linux/brcm2708/patches-4.19/950-0097-AXI-performance-monitor-driver-2222.patch new file mode 100644 index 0000000000..02de46bdc2 --- /dev/null +++ b/target/linux/brcm2708/patches-4.19/950-0097-AXI-performance-monitor-driver-2222.patch @@ -0,0 +1,681 @@ +From b683c668fef086a8c723d55e88364405047d2196 Mon Sep 17 00:00:00 2001 +From: James Hughes +Date: Tue, 14 Nov 2017 15:13:15 +0000 +Subject: [PATCH] AXI performance monitor driver (#2222) + +Uses the debugfs I/F to provide access to the AXI +bus performance monitors. + +Requires the new mailbox peripheral access for access +to the VPU performance registers, system bus access +is done using direct register reads. + +Signed-off-by: James Hughes +--- + drivers/perf/Kconfig | 7 + + drivers/perf/Makefile | 1 + + drivers/perf/raspberrypi_axi_monitor.c | 637 +++++++++++++++++++++++++ + 3 files changed, 645 insertions(+) + create mode 100644 drivers/perf/raspberrypi_axi_monitor.c + +--- a/drivers/perf/Kconfig ++++ b/drivers/perf/Kconfig +@@ -102,4 +102,11 @@ config ARM_SPE_PMU + Extension, which provides periodic sampling of operations in + the CPU pipeline and reports this via the perf AUX interface. + ++config RPI_AXIPERF ++ depends on ARCH_BCM2835 ++ tristate "RaspberryPi AXI Performance monitors" ++ default n ++ help ++ Say y if you want to use Raspberry Pi AXI performance monitors, m if ++ you want to build it as a module. + endmenu +--- a/drivers/perf/Makefile ++++ b/drivers/perf/Makefile +@@ -9,3 +9,4 @@ obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu + obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o + obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o + obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o ++obj-$(CONFIG_RPI_AXIPERF) += raspberrypi_axi_monitor.o +--- /dev/null ++++ b/drivers/perf/raspberrypi_axi_monitor.c +@@ -0,0 +1,637 @@ ++/* ++ * raspberrypi_axi_monitor.c ++ * ++ * Author: james.hughes@raspberrypi.org ++ * ++ * Raspberry Pi AXI performance counters. ++ * ++ * Copyright (C) 2017 Raspberry Pi Trading Ltd. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#define NUM_MONITORS 2 ++#define NUM_BUS_WATCHERS_PER_MONITOR 3 ++ ++#define SYSTEM_MONITOR 0 ++#define VPU_MONITOR 1 ++ ++#define MAX_BUSES 16 ++#define DEFAULT_SAMPLE_TIME 100 ++ ++#define NUM_BUS_WATCHER_RESULTS 9 ++ ++struct bus_watcher_data { ++ union { ++ u32 results[NUM_BUS_WATCHER_RESULTS]; ++ struct { ++ u32 atrans; ++ u32 atwait; ++ u32 amax; ++ u32 wtrans; ++ u32 wtwait; ++ u32 wmax; ++ u32 rtrans; ++ u32 rtwait; ++ u32 rmax; ++ }; ++ }; ++}; ++ ++ ++struct rpi_axiperf { ++ struct platform_device *dev; ++ struct dentry *root_folder; ++ ++ struct task_struct *monitor_thread; ++ struct mutex lock; ++ ++ struct rpi_firmware *firmware; ++ ++ /* Sample time spent on for each bus */ ++ int sample_time; ++ ++ /* Now storage for the per monitor settings and the resulting ++ * performance figures ++ */ ++ struct { ++ /* Bit field of buses we want to monitor */ ++ int bus_enabled; ++ /* Bit field of buses to filter by */ ++ int bus_filter; ++ /* The current buses being monitored on this monitor */ ++ int current_bus[NUM_BUS_WATCHERS_PER_MONITOR]; ++ /* The last bus monitored on this monitor */ ++ int last_monitored; ++ ++ /* Set true if this mailbox must use the mailbox interface ++ * rather than access registers directly. ++ */ ++ int use_mailbox_interface; ++ ++ /* Current result values */ ++ struct bus_watcher_data results[MAX_BUSES]; ++ ++ struct dentry *debugfs_entry; ++ void __iomem *base_address; ++ ++ } monitor[NUM_MONITORS]; ++ ++}; ++ ++static struct rpi_axiperf *state; ++ ++/* Two monitors, System and VPU, each with the following register sets. ++ * Each monitor can only monitor one bus at a time, so we time share them, ++ * giving each bus 100ms (default, settable via debugfs) of time on its ++ * associated monitor ++ * Record results from the three Bus watchers per monitor and push to the sysfs ++ */ ++ ++/* general registers */ ++const int GEN_CTRL; ++ ++const int GEN_CTL_ENABLE_BIT = BIT(0); ++const int GEN_CTL_RESET_BIT = BIT(1); ++ ++/* Bus watcher registers */ ++const int BW_PITCH = 0x40; ++ ++const int BW0_CTRL = 0x40; ++const int BW1_CTRL = 0x80; ++const int BW2_CTRL = 0xc0; ++ ++const int BW_ATRANS_OFFSET = 0x04; ++const int BW_ATWAIT_OFFSET = 0x08; ++const int BW_AMAX_OFFSET = 0x0c; ++const int BW_WTRANS_OFFSET = 0x10; ++const int BW_WTWAIT_OFFSET = 0x14; ++const int BW_WMAX_OFFSET = 0x18; ++const int BW_RTRANS_OFFSET = 0x1c; ++const int BW_RTWAIT_OFFSET = 0x20; ++const int BW_RMAX_OFFSET = 0x24; ++ ++const int BW_CTRL_RESET_BIT = BIT(31); ++const int BW_CTRL_ENABLE_BIT = BIT(30); ++const int BW_CTRL_ENABLE_ID_FILTER_BIT = BIT(29); ++const int BW_CTRL_LIMIT_HALT_BIT = BIT(28); ++ ++const int BW_CTRL_SOURCE_SHIFT = 8; ++const int BW_CTRL_SOURCE_MASK = GENMASK(12, 8); // 5 bits ++const int BW_CTRL_BUS_WATCH_SHIFT; ++const int BW_CTRL_BUS_WATCH_MASK = GENMASK(5, 0); // 6 bits ++const int BW_CTRL_BUS_FILTER_SHIFT = 8; ++ ++const static char *bus_filter_strings[] = { ++ "", ++ "CORE0_V", ++ "ICACHE0", ++ "DCACHE0", ++ "CORE1_V", ++ "ICACHE1", ++ "DCACHE1", ++ "L2_MAIN", ++ "HOST_PORT", ++ "HOST_PORT2", ++ "HVS", ++ "ISP", ++ "VIDEO_DCT", ++ "VIDEO_SD2AXI", ++ "CAM0", ++ "CAM1", ++ "DMA0", ++ "DMA1", ++ "DMA2_VPU", ++ "JPEG", ++ "VIDEO_CME", ++ "TRANSPOSER", ++ "VIDEO_FME", ++ "CCP2TX", ++ "USB", ++ "V3D0", ++ "V3D1", ++ "V3D2", ++ "AVE", ++ "DEBUG", ++ "CPU", ++ "M30" ++}; ++ ++const int num_bus_filters = ARRAY_SIZE(bus_filter_strings); ++ ++const static char *system_bus_string[] = { ++ "DMA_L2", ++ "TRANS", ++ "JPEG", ++ "SYSTEM_UC", ++ "DMA_UC", ++ "SYSTEM_L2", ++ "CCP2TX", ++ "MPHI_RX", ++ "MPHI_TX", ++ "HVS", ++ "H264", ++ "ISP", ++ "V3D", ++ "PERIPHERAL", ++ "CPU_UC", ++ "CPU_L2" ++}; ++ ++const int num_system_buses = ARRAY_SIZE(system_bus_string); ++ ++const static char *vpu_bus_string[] = { ++ "VPU1_D_L2", ++ "VPU0_D_L2", ++ "VPU1_I_L2", ++ "VPU0_I_L2", ++ "SYSTEM_L2", ++ "L2_FLUSH", ++ "DMA_L2", ++ "VPU1_D_UC", ++ "VPU0_D_UC", ++ "VPU1_I_UC", ++ "VPU0_I_UC", ++ "SYSTEM_UC", ++ "L2_OUT", ++ "DMA_UC", ++ "SDRAM", ++ "L2_IN" ++}; ++ ++const int num_vpu_buses = ARRAY_SIZE(vpu_bus_string); ++ ++const static char *monitor_name[] = { ++ "System", ++ "VPU" ++}; ++ ++static inline void write_reg(int monitor, int reg, u32 value) ++{ ++ writel(value, state->monitor[monitor].base_address + reg); ++} ++ ++static inline u32 read_reg(int monitor, u32 reg) ++{ ++ return readl(state->monitor[monitor].base_address + reg); ++} ++ ++static void read_bus_watcher(int monitor, int watcher, u32 *results) ++{ ++ if (state->monitor[monitor].use_mailbox_interface) { ++ /* We have 9 results, plus the overheads of start address and ++ * length So 11 u32 to define ++ */ ++ u32 tmp[11]; ++ int err; ++ ++ tmp[0] = (u32)(state->monitor[monitor].base_address + watcher ++ + BW_ATRANS_OFFSET); ++ tmp[1] = NUM_BUS_WATCHER_RESULTS; ++ ++ err = rpi_firmware_property(state->firmware, ++ RPI_FIRMWARE_GET_PERIPH_REG, ++ tmp, sizeof(tmp)); ++ ++ if (err < 0 || tmp[1] != NUM_BUS_WATCHER_RESULTS) ++ dev_err_once(&state->dev->dev, ++ "Failed to read bus watcher"); ++ else ++ memcpy(results, &tmp[2], ++ NUM_BUS_WATCHER_RESULTS * sizeof(u32)); ++ } else { ++ int i; ++ void __iomem *addr = state->monitor[monitor].base_address ++ + watcher + BW_ATRANS_OFFSET; ++ for (i = 0; i < NUM_BUS_WATCHER_RESULTS; i++, addr += 4) ++ *results++ = readl(addr); ++ } ++} ++ ++static void set_monitor_control(int monitor, u32 set) ++{ ++ if (state->monitor[monitor].use_mailbox_interface) { ++ u32 tmp[3] = {(u32)(state->monitor[monitor].base_address + ++ GEN_CTRL), 1, set}; ++ int err = rpi_firmware_property(state->firmware, ++ RPI_FIRMWARE_SET_PERIPH_REG, ++ tmp, sizeof(tmp)); ++ ++ if (err < 0 || tmp[1] != 1) ++ dev_err_once(&state->dev->dev, ++ "Failed to set monitor control"); ++ } else ++ write_reg(monitor, GEN_CTRL, set); ++} ++ ++static void set_bus_watcher_control(int monitor, int watcher, u32 set) ++{ ++ if (state->monitor[monitor].use_mailbox_interface) { ++ u32 tmp[3] = {(u32)(state->monitor[monitor].base_address + ++ watcher), 1, set}; ++ int err = rpi_firmware_property(state->firmware, ++ RPI_FIRMWARE_SET_PERIPH_REG, ++ tmp, sizeof(tmp)); ++ if (err < 0 || tmp[1] != 1) ++ dev_err_once(&state->dev->dev, ++ "Failed to set bus watcher control"); ++ } else ++ write_reg(monitor, watcher, set); ++} ++ ++static void monitor(struct rpi_axiperf *state) ++{ ++ int monitor, num_buses[NUM_MONITORS]; ++ ++ mutex_lock(&state->lock); ++ ++ for (monitor = 0; monitor < NUM_MONITORS; monitor++) { ++ typeof(state->monitor[0]) *mon = &(state->monitor[monitor]); ++ ++ /* Anything enabled? */ ++ if (mon->bus_enabled == 0) { ++ /* No, disable all monitoring for this monitor */ ++ set_monitor_control(monitor, GEN_CTL_RESET_BIT); ++ } else { ++ int i; ++ ++ /* Find out how many busses we want to monitor, and ++ * spread our 3 actual monitors over them ++ */ ++ num_buses[monitor] = hweight32(mon->bus_enabled); ++ num_buses[monitor] = min(num_buses[monitor], ++ NUM_BUS_WATCHERS_PER_MONITOR); ++ ++ for (i = 0; i < num_buses[monitor]; i++) { ++ int bus_control; ++ ++ do { ++ mon->last_monitored++; ++ mon->last_monitored &= 0xf; ++ } while ((mon->bus_enabled & ++ (1 << mon->last_monitored)) == 0); ++ ++ mon->current_bus[i] = mon->last_monitored; ++ ++ /* Reset the counters */ ++ set_bus_watcher_control(monitor, ++ BW0_CTRL + ++ i*BW_PITCH, ++ BW_CTRL_RESET_BIT); ++ ++ bus_control = BW_CTRL_ENABLE_BIT | ++ mon->current_bus[i]; ++ ++ if (mon->bus_filter) { ++ bus_control |= ++ BW_CTRL_ENABLE_ID_FILTER_BIT; ++ bus_control |= ++ ((mon->bus_filter & 0x1f) ++ << BW_CTRL_BUS_FILTER_SHIFT); ++ } ++ ++ // Start capture ++ set_bus_watcher_control(monitor, ++ BW0_CTRL + i*BW_PITCH, ++ bus_control); ++ } ++ } ++ ++ /* start monitoring */ ++ set_monitor_control(monitor, GEN_CTL_ENABLE_BIT); ++ } ++ ++ mutex_unlock(&state->lock); ++ ++ msleep(state->sample_time); ++ ++ /* Now read the results */ ++ ++ mutex_lock(&state->lock); ++ for (monitor = 0; monitor < NUM_MONITORS; monitor++) { ++ typeof(state->monitor[0]) *mon = &(state->monitor[monitor]); ++ ++ /* Anything enabled? */ ++ if (mon->bus_enabled == 0) { ++ /* No, disable all monitoring for this monitor */ ++ set_monitor_control(monitor, 0); ++ } else { ++ int i; ++ ++ for (i = 0; i < num_buses[monitor]; i++) { ++ int bus = mon->current_bus[i]; ++ ++ read_bus_watcher(monitor, ++ BW0_CTRL + i*BW_PITCH, ++ (u32 *)&mon->results[bus].results); ++ } ++ } ++ } ++ mutex_unlock(&state->lock); ++} ++ ++static int monitor_thread(void *data) ++{ ++ struct rpi_axiperf *state = data; ++ ++ while (1) { ++ monitor(state); ++ ++ if (kthread_should_stop()) ++ return 0; ++ } ++ return 0; ++} ++ ++static ssize_t myreader(struct file *fp, char __user *user_buffer, ++ size_t count, loff_t *position) ++{ ++#define INIT_BUFF_SIZE 2048 ++ ++ int i; ++ int idx = (int)(fp->private_data); ++ int num_buses, cnt; ++ char *string_buffer; ++ int buff_size = INIT_BUFF_SIZE; ++ char *p; ++ typeof(state->monitor[0]) *mon = &(state->monitor[idx]); ++ ++ if (idx < 0 || idx > NUM_MONITORS) ++ idx = 0; ++ ++ num_buses = idx == SYSTEM_MONITOR ? num_system_buses : num_vpu_buses; ++ ++ string_buffer = kmalloc(buff_size, GFP_KERNEL); ++ ++ if (!string_buffer) { ++ dev_err(&state->dev->dev, ++ "Failed temporary string allocation\n"); ++ return 0; ++ } ++ ++ p = string_buffer; ++ ++ mutex_lock(&state->lock); ++ ++ if (mon->bus_filter) { ++ int filt = min(mon->bus_filter & 0x1f, num_bus_filters); ++ ++ cnt = snprintf(p, buff_size, ++ "\nMonitoring transactions from %s only\n", ++ bus_filter_strings[filt]); ++ p += cnt; ++ buff_size -= cnt; ++ } ++ ++ cnt = snprintf(p, buff_size, " Bus | Atrans Atwait AMax Wtrans Wtwait WMax Rtrans Rtwait RMax\n" ++ "======================================================================================================\n"); ++ ++ if (cnt >= buff_size) ++ goto done; ++ ++ p += cnt; ++ buff_size -= cnt; ++ ++ for (i = 0; i < num_buses; i++) { ++ if (mon->bus_enabled & (1 << i)) { ++#define DIVIDER (1024) ++ typeof(mon->results[0]) *res = &(mon->results[i]); ++ ++ cnt = snprintf(p, buff_size, ++ "%10s | %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK\n", ++ idx == SYSTEM_MONITOR ? ++ system_bus_string[i] : ++ vpu_bus_string[i], ++ res->atrans/DIVIDER, ++ res->atwait/DIVIDER, ++ res->amax/DIVIDER, ++ res->wtrans/DIVIDER, ++ res->wtwait/DIVIDER, ++ res->wmax/DIVIDER, ++ res->rtrans/DIVIDER, ++ res->rtwait/DIVIDER, ++ res->rmax/DIVIDER ++ ); ++ if (cnt >= buff_size) ++ goto done; ++ ++ p += cnt; ++ buff_size -= cnt; ++ } ++ } ++ ++ mutex_unlock(&state->lock); ++ ++done: ++ ++ /* did the last string entry exceeed our buffer size? ie out of string ++ * buffer space. Null terminate, use what we have. ++ */ ++ if (cnt >= buff_size) { ++ buff_size = 0; ++ string_buffer[INIT_BUFF_SIZE] = 0; ++ } ++ ++ cnt = simple_read_from_buffer(user_buffer, count, position, ++ string_buffer, ++ INIT_BUFF_SIZE - buff_size); ++ ++ kfree(string_buffer); ++ ++ return cnt; ++} ++ ++static ssize_t mywriter(struct file *fp, const char __user *user_buffer, ++ size_t count, loff_t *position) ++{ ++ int idx = (int)(fp->private_data); ++ ++ if (idx < 0 || idx > NUM_MONITORS) ++ idx = 0; ++ ++ /* At the moment, this does nothing, but in the future it could be ++ * used to reset counters etc ++ */ ++ return count; ++} ++ ++static const struct file_operations fops_debug = { ++ .read = myreader, ++ .write = mywriter, ++ .open = simple_open ++}; ++ ++static int rpi_axiperf_probe(struct platform_device *pdev) ++{ ++ int ret = 0, i; ++ struct device *dev = &pdev->dev; ++ struct device_node *np = dev->of_node; ++ struct device_node *fw_node; ++ ++ state = kzalloc(sizeof(struct rpi_axiperf), GFP_KERNEL); ++ if (!state) ++ return -ENOMEM; ++ ++ /* Get the firmware handle for future rpi-firmware-xxx calls */ ++ fw_node = of_parse_phandle(np, "firmware", 0); ++ if (!fw_node) { ++ dev_err(dev, "Missing firmware node\n"); ++ return -ENOENT; ++ } ++ ++ state->firmware = rpi_firmware_get(fw_node); ++ if (!state->firmware) ++ return -EPROBE_DEFER; ++ ++ /* Special case for the VPU monitor, we must use the mailbox interface ++ * as it is not accessible from the ARM address space. ++ */ ++ state->monitor[VPU_MONITOR].use_mailbox_interface = 1; ++ state->monitor[SYSTEM_MONITOR].use_mailbox_interface = 0; ++ ++ for (i = 0; i < NUM_MONITORS; i++) { ++ if (state->monitor[i].use_mailbox_interface) { ++ of_property_read_u32_index(np, "reg", i*2, ++ (u32 *)(&state->monitor[i].base_address)); ++ } else { ++ struct resource *resource = ++ platform_get_resource(pdev, IORESOURCE_MEM, i); ++ ++ state->monitor[i].base_address = ++ devm_ioremap_resource(&pdev->dev, resource); ++ } ++ ++ if (IS_ERR(state->monitor[i].base_address)) ++ return PTR_ERR(state->monitor[i].base_address); ++ ++ /* Enable all buses by default */ ++ state->monitor[i].bus_enabled = 0xffff; ++ } ++ ++ state->dev = pdev; ++ platform_set_drvdata(pdev, state); ++ ++ state->sample_time = DEFAULT_SAMPLE_TIME; ++ ++ /* Set up all the debugfs stuff */ ++ state->root_folder = debugfs_create_dir(KBUILD_MODNAME, NULL); ++ ++ for (i = 0; i < NUM_MONITORS; i++) { ++ state->monitor[i].debugfs_entry = ++ debugfs_create_dir(monitor_name[i], state->root_folder); ++ if (IS_ERR(state->monitor[i].debugfs_entry)) ++ state->monitor[i].debugfs_entry = NULL; ++ ++ debugfs_create_file("data", 0444, ++ state->monitor[i].debugfs_entry, ++ (void *)i, &fops_debug); ++ debugfs_create_u32("enable", 0644, ++ state->monitor[i].debugfs_entry, ++ &state->monitor[i].bus_enabled); ++ debugfs_create_u32("filter", 0644, ++ state->monitor[i].debugfs_entry, ++ &state->monitor[i].bus_filter); ++ debugfs_create_u32("sample_time", 0644, ++ state->monitor[i].debugfs_entry, ++ &state->sample_time); ++ } ++ ++ mutex_init(&state->lock); ++ ++ state->monitor_thread = kthread_run(monitor_thread, state, ++ "rpi-axiperfmon"); ++ ++ return ret; ++ ++} ++ ++static int rpi_axiperf_remove(struct platform_device *dev) ++{ ++ int ret = 0; ++ ++ kthread_stop(state->monitor_thread); ++ ++ debugfs_remove_recursive(state->root_folder); ++ state->root_folder = NULL; ++ ++ return ret; ++} ++ ++static const struct of_device_id rpi_axiperf_match[] = { ++ { ++ .compatible = "brcm,bcm2835-axiperf", ++ }, ++ {}, ++}; ++MODULE_DEVICE_TABLE(of, rpi_axiperf_match); ++ ++static struct platform_driver rpi_axiperf_driver = { ++ .probe = rpi_axiperf_probe, ++ .remove = rpi_axiperf_remove, ++ .driver = { ++ .name = "rpi-bcm2835-axiperf", ++ .of_match_table = of_match_ptr(rpi_axiperf_match), ++ }, ++}; ++ ++module_platform_driver(rpi_axiperf_driver); ++ ++/* Module information */ ++MODULE_AUTHOR("James Hughes "); ++MODULE_DESCRIPTION("RPI AXI Performance monitor driver"); ++MODULE_LICENSE("GPL"); ++