1 From d9151380fdab4a0f2abcc08d2c344b0eabe48ab9 Mon Sep 17 00:00:00 2001
2 From: James Hughes <JamesH65@users.noreply.github.com>
3 Date: Tue, 14 Nov 2017 15:13:15 +0000
4 Subject: [PATCH] AXI performance monitor driver (#2222)
6 Uses the debugfs I/F to provide access to the AXI
7 bus performance monitors.
9 Requires the new mailbox peripheral access for access
10 to the VPU performance registers, system bus access
11 is done using direct register reads.
13 Signed-off-by: James Hughes <james.hughes@raspberrypi.org>
15 raspberrypi_axi_monitor: suppress warning
17 Suppress the following warning by casting the pointer to and uintptr_t
20 Signed-off-by: Matteo Croce <mcroce@redhat.com>
22 drivers/perf/Kconfig | 8 +
23 drivers/perf/Makefile | 1 +
24 drivers/perf/raspberrypi_axi_monitor.c | 637 +++++++++++++++++++++++++
25 3 files changed, 646 insertions(+)
26 create mode 100644 drivers/perf/raspberrypi_axi_monitor.c
28 --- a/drivers/perf/Kconfig
29 +++ b/drivers/perf/Kconfig
30 @@ -130,6 +130,14 @@ config ARM_SPE_PMU
31 Extension, which provides periodic sampling of operations in
32 the CPU pipeline and reports this via the perf AUX interface.
35 + depends on ARCH_BCM2835
36 + tristate "RaspberryPi AXI Performance monitors"
39 + Say y if you want to use Raspberry Pi AXI performance monitors, m if
40 + you want to build it as a module.
42 source "drivers/perf/hisilicon/Kconfig"
45 --- a/drivers/perf/Makefile
46 +++ b/drivers/perf/Makefile
47 @@ -13,3 +13,4 @@ obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu
48 obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
49 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
50 obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
51 +obj-$(CONFIG_RPI_AXIPERF) += raspberrypi_axi_monitor.o
53 +++ b/drivers/perf/raspberrypi_axi_monitor.c
56 + * raspberrypi_axi_monitor.c
58 + * Author: james.hughes@raspberrypi.org
60 + * Raspberry Pi AXI performance counters.
62 + * Copyright (C) 2017 Raspberry Pi Trading Ltd.
64 + * This program is free software; you can redistribute it and/or modify
65 + * it under the terms of the GNU General Public License version 2 as
66 + * published by the Free Software Foundation.
69 +#include <linux/debugfs.h>
70 +#include <linux/devcoredump.h>
71 +#include <linux/device.h>
72 +#include <linux/kthread.h>
73 +#include <linux/module.h>
74 +#include <linux/netdevice.h>
75 +#include <linux/mutex.h>
76 +#include <linux/of.h>
77 +#include <linux/platform_device.h>
79 +#include <soc/bcm2835/raspberrypi-firmware.h>
81 +#define NUM_MONITORS 2
82 +#define NUM_BUS_WATCHERS_PER_MONITOR 3
84 +#define SYSTEM_MONITOR 0
85 +#define VPU_MONITOR 1
88 +#define DEFAULT_SAMPLE_TIME 100
90 +#define NUM_BUS_WATCHER_RESULTS 9
92 +struct bus_watcher_data {
94 + u32 results[NUM_BUS_WATCHER_RESULTS];
110 +struct rpi_axiperf {
111 + struct platform_device *dev;
112 + struct dentry *root_folder;
114 + struct task_struct *monitor_thread;
117 + struct rpi_firmware *firmware;
119 + /* Sample time spent on for each bus */
122 + /* Now storage for the per monitor settings and the resulting
123 + * performance figures
126 + /* Bit field of buses we want to monitor */
128 + /* Bit field of buses to filter by */
130 + /* The current buses being monitored on this monitor */
131 + int current_bus[NUM_BUS_WATCHERS_PER_MONITOR];
132 + /* The last bus monitored on this monitor */
133 + int last_monitored;
135 + /* Set true if this mailbox must use the mailbox interface
136 + * rather than access registers directly.
138 + int use_mailbox_interface;
140 + /* Current result values */
141 + struct bus_watcher_data results[MAX_BUSES];
143 + struct dentry *debugfs_entry;
144 + void __iomem *base_address;
146 + } monitor[NUM_MONITORS];
150 +static struct rpi_axiperf *state;
152 +/* Two monitors, System and VPU, each with the following register sets.
153 + * Each monitor can only monitor one bus at a time, so we time share them,
154 + * giving each bus 100ms (default, settable via debugfs) of time on its
155 + * associated monitor
156 + * Record results from the three Bus watchers per monitor and push to the sysfs
159 +/* general registers */
162 +const int GEN_CTL_ENABLE_BIT = BIT(0);
163 +const int GEN_CTL_RESET_BIT = BIT(1);
165 +/* Bus watcher registers */
166 +const int BW_PITCH = 0x40;
168 +const int BW0_CTRL = 0x40;
169 +const int BW1_CTRL = 0x80;
170 +const int BW2_CTRL = 0xc0;
172 +const int BW_ATRANS_OFFSET = 0x04;
173 +const int BW_ATWAIT_OFFSET = 0x08;
174 +const int BW_AMAX_OFFSET = 0x0c;
175 +const int BW_WTRANS_OFFSET = 0x10;
176 +const int BW_WTWAIT_OFFSET = 0x14;
177 +const int BW_WMAX_OFFSET = 0x18;
178 +const int BW_RTRANS_OFFSET = 0x1c;
179 +const int BW_RTWAIT_OFFSET = 0x20;
180 +const int BW_RMAX_OFFSET = 0x24;
182 +const int BW_CTRL_RESET_BIT = BIT(31);
183 +const int BW_CTRL_ENABLE_BIT = BIT(30);
184 +const int BW_CTRL_ENABLE_ID_FILTER_BIT = BIT(29);
185 +const int BW_CTRL_LIMIT_HALT_BIT = BIT(28);
187 +const int BW_CTRL_SOURCE_SHIFT = 8;
188 +const int BW_CTRL_SOURCE_MASK = GENMASK(12, 8); // 5 bits
189 +const int BW_CTRL_BUS_WATCH_SHIFT;
190 +const int BW_CTRL_BUS_WATCH_MASK = GENMASK(5, 0); // 6 bits
191 +const int BW_CTRL_BUS_FILTER_SHIFT = 8;
193 +const static char *bus_filter_strings[] = {
228 +const int num_bus_filters = ARRAY_SIZE(bus_filter_strings);
230 +const static char *system_bus_string[] = {
249 +const int num_system_buses = ARRAY_SIZE(system_bus_string);
251 +const static char *vpu_bus_string[] = {
270 +const int num_vpu_buses = ARRAY_SIZE(vpu_bus_string);
272 +const static char *monitor_name[] = {
277 +static inline void write_reg(int monitor, int reg, u32 value)
279 + writel(value, state->monitor[monitor].base_address + reg);
282 +static inline u32 read_reg(int monitor, u32 reg)
284 + return readl(state->monitor[monitor].base_address + reg);
287 +static void read_bus_watcher(int monitor, int watcher, u32 *results)
289 + if (state->monitor[monitor].use_mailbox_interface) {
290 + /* We have 9 results, plus the overheads of start address and
291 + * length So 11 u32 to define
296 + tmp[0] = (u32)(uintptr_t)(state->monitor[monitor].base_address + watcher
297 + + BW_ATRANS_OFFSET);
298 + tmp[1] = NUM_BUS_WATCHER_RESULTS;
300 + err = rpi_firmware_property(state->firmware,
301 + RPI_FIRMWARE_GET_PERIPH_REG,
304 + if (err < 0 || tmp[1] != NUM_BUS_WATCHER_RESULTS)
305 + dev_err_once(&state->dev->dev,
306 + "Failed to read bus watcher");
308 + memcpy(results, &tmp[2],
309 + NUM_BUS_WATCHER_RESULTS * sizeof(u32));
312 + void __iomem *addr = state->monitor[monitor].base_address
313 + + watcher + BW_ATRANS_OFFSET;
314 + for (i = 0; i < NUM_BUS_WATCHER_RESULTS; i++, addr += 4)
315 + *results++ = readl(addr);
319 +static void set_monitor_control(int monitor, u32 set)
321 + if (state->monitor[monitor].use_mailbox_interface) {
322 + u32 tmp[3] = {(u32)(uintptr_t)(state->monitor[monitor].base_address +
323 + GEN_CTRL), 1, set};
324 + int err = rpi_firmware_property(state->firmware,
325 + RPI_FIRMWARE_SET_PERIPH_REG,
328 + if (err < 0 || tmp[1] != 1)
329 + dev_err_once(&state->dev->dev,
330 + "Failed to set monitor control");
332 + write_reg(monitor, GEN_CTRL, set);
335 +static void set_bus_watcher_control(int monitor, int watcher, u32 set)
337 + if (state->monitor[monitor].use_mailbox_interface) {
338 + u32 tmp[3] = {(u32)(uintptr_t)(state->monitor[monitor].base_address +
340 + int err = rpi_firmware_property(state->firmware,
341 + RPI_FIRMWARE_SET_PERIPH_REG,
343 + if (err < 0 || tmp[1] != 1)
344 + dev_err_once(&state->dev->dev,
345 + "Failed to set bus watcher control");
347 + write_reg(monitor, watcher, set);
350 +static void monitor(struct rpi_axiperf *state)
352 + int monitor, num_buses[NUM_MONITORS];
354 + mutex_lock(&state->lock);
356 + for (monitor = 0; monitor < NUM_MONITORS; monitor++) {
357 + typeof(state->monitor[0]) *mon = &(state->monitor[monitor]);
359 + /* Anything enabled? */
360 + if (mon->bus_enabled == 0) {
361 + /* No, disable all monitoring for this monitor */
362 + set_monitor_control(monitor, GEN_CTL_RESET_BIT);
366 + /* Find out how many busses we want to monitor, and
367 + * spread our 3 actual monitors over them
369 + num_buses[monitor] = hweight32(mon->bus_enabled);
370 + num_buses[monitor] = min(num_buses[monitor],
371 + NUM_BUS_WATCHERS_PER_MONITOR);
373 + for (i = 0; i < num_buses[monitor]; i++) {
377 + mon->last_monitored++;
378 + mon->last_monitored &= 0xf;
379 + } while ((mon->bus_enabled &
380 + (1 << mon->last_monitored)) == 0);
382 + mon->current_bus[i] = mon->last_monitored;
384 + /* Reset the counters */
385 + set_bus_watcher_control(monitor,
388 + BW_CTRL_RESET_BIT);
390 + bus_control = BW_CTRL_ENABLE_BIT |
391 + mon->current_bus[i];
393 + if (mon->bus_filter) {
395 + BW_CTRL_ENABLE_ID_FILTER_BIT;
397 + ((mon->bus_filter & 0x1f)
398 + << BW_CTRL_BUS_FILTER_SHIFT);
402 + set_bus_watcher_control(monitor,
403 + BW0_CTRL + i*BW_PITCH,
408 + /* start monitoring */
409 + set_monitor_control(monitor, GEN_CTL_ENABLE_BIT);
412 + mutex_unlock(&state->lock);
414 + msleep(state->sample_time);
416 + /* Now read the results */
418 + mutex_lock(&state->lock);
419 + for (monitor = 0; monitor < NUM_MONITORS; monitor++) {
420 + typeof(state->monitor[0]) *mon = &(state->monitor[monitor]);
422 + /* Anything enabled? */
423 + if (mon->bus_enabled == 0) {
424 + /* No, disable all monitoring for this monitor */
425 + set_monitor_control(monitor, 0);
429 + for (i = 0; i < num_buses[monitor]; i++) {
430 + int bus = mon->current_bus[i];
432 + read_bus_watcher(monitor,
433 + BW0_CTRL + i*BW_PITCH,
434 + (u32 *)&mon->results[bus].results);
438 + mutex_unlock(&state->lock);
441 +static int monitor_thread(void *data)
443 + struct rpi_axiperf *state = data;
448 + if (kthread_should_stop())
454 +static ssize_t myreader(struct file *fp, char __user *user_buffer,
455 + size_t count, loff_t *position)
457 +#define INIT_BUFF_SIZE 2048
460 + int idx = (int)(uintptr_t)(fp->private_data);
461 + int num_buses, cnt;
462 + char *string_buffer;
463 + int buff_size = INIT_BUFF_SIZE;
465 + typeof(state->monitor[0]) *mon = &(state->monitor[idx]);
467 + if (idx < 0 || idx > NUM_MONITORS)
470 + num_buses = idx == SYSTEM_MONITOR ? num_system_buses : num_vpu_buses;
472 + string_buffer = kmalloc(buff_size, GFP_KERNEL);
474 + if (!string_buffer) {
475 + dev_err(&state->dev->dev,
476 + "Failed temporary string allocation\n");
482 + mutex_lock(&state->lock);
484 + if (mon->bus_filter) {
485 + int filt = min(mon->bus_filter & 0x1f, num_bus_filters);
487 + cnt = snprintf(p, buff_size,
488 + "\nMonitoring transactions from %s only\n",
489 + bus_filter_strings[filt]);
494 + cnt = snprintf(p, buff_size, " Bus | Atrans Atwait AMax Wtrans Wtwait WMax Rtrans Rtwait RMax\n"
495 + "======================================================================================================\n");
497 + if (cnt >= buff_size)
503 + for (i = 0; i < num_buses; i++) {
504 + if (mon->bus_enabled & (1 << i)) {
505 +#define DIVIDER (1024)
506 + typeof(mon->results[0]) *res = &(mon->results[i]);
508 + cnt = snprintf(p, buff_size,
509 + "%10s | %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK\n",
510 + idx == SYSTEM_MONITOR ?
511 + system_bus_string[i] :
513 + res->atrans/DIVIDER,
514 + res->atwait/DIVIDER,
516 + res->wtrans/DIVIDER,
517 + res->wtwait/DIVIDER,
519 + res->rtrans/DIVIDER,
520 + res->rtwait/DIVIDER,
523 + if (cnt >= buff_size)
531 + mutex_unlock(&state->lock);
535 + /* did the last string entry exceeed our buffer size? ie out of string
536 + * buffer space. Null terminate, use what we have.
538 + if (cnt >= buff_size) {
540 + string_buffer[INIT_BUFF_SIZE] = 0;
543 + cnt = simple_read_from_buffer(user_buffer, count, position,
545 + INIT_BUFF_SIZE - buff_size);
547 + kfree(string_buffer);
552 +static ssize_t mywriter(struct file *fp, const char __user *user_buffer,
553 + size_t count, loff_t *position)
555 + int idx = (int)(uintptr_t)(fp->private_data);
557 + if (idx < 0 || idx > NUM_MONITORS)
560 + /* At the moment, this does nothing, but in the future it could be
561 + * used to reset counters etc
566 +static const struct file_operations fops_debug = {
569 + .open = simple_open
572 +static int rpi_axiperf_probe(struct platform_device *pdev)
575 + struct device *dev = &pdev->dev;
576 + struct device_node *np = dev->of_node;
577 + struct device_node *fw_node;
579 + state = kzalloc(sizeof(struct rpi_axiperf), GFP_KERNEL);
583 + /* Get the firmware handle for future rpi-firmware-xxx calls */
584 + fw_node = of_parse_phandle(np, "firmware", 0);
586 + dev_err(dev, "Missing firmware node\n");
590 + state->firmware = rpi_firmware_get(fw_node);
591 + if (!state->firmware)
592 + return -EPROBE_DEFER;
594 + /* Special case for the VPU monitor, we must use the mailbox interface
595 + * as it is not accessible from the ARM address space.
597 + state->monitor[VPU_MONITOR].use_mailbox_interface = 1;
598 + state->monitor[SYSTEM_MONITOR].use_mailbox_interface = 0;
600 + for (i = 0; i < NUM_MONITORS; i++) {
601 + if (state->monitor[i].use_mailbox_interface) {
602 + of_property_read_u32_index(np, "reg", i*2,
603 + (u32 *)(&state->monitor[i].base_address));
605 + struct resource *resource =
606 + platform_get_resource(pdev, IORESOURCE_MEM, i);
608 + state->monitor[i].base_address =
609 + devm_ioremap_resource(&pdev->dev, resource);
612 + if (IS_ERR(state->monitor[i].base_address))
613 + return PTR_ERR(state->monitor[i].base_address);
615 + /* Enable all buses by default */
616 + state->monitor[i].bus_enabled = 0xffff;
620 + platform_set_drvdata(pdev, state);
622 + state->sample_time = DEFAULT_SAMPLE_TIME;
624 + /* Set up all the debugfs stuff */
625 + state->root_folder = debugfs_create_dir(KBUILD_MODNAME, NULL);
627 + for (i = 0; i < NUM_MONITORS; i++) {
628 + state->monitor[i].debugfs_entry =
629 + debugfs_create_dir(monitor_name[i], state->root_folder);
630 + if (IS_ERR(state->monitor[i].debugfs_entry))
631 + state->monitor[i].debugfs_entry = NULL;
633 + debugfs_create_file("data", 0444,
634 + state->monitor[i].debugfs_entry,
635 + (void *)(uintptr_t)i, &fops_debug);
636 + debugfs_create_u32("enable", 0644,
637 + state->monitor[i].debugfs_entry,
638 + &state->monitor[i].bus_enabled);
639 + debugfs_create_u32("filter", 0644,
640 + state->monitor[i].debugfs_entry,
641 + &state->monitor[i].bus_filter);
642 + debugfs_create_u32("sample_time", 0644,
643 + state->monitor[i].debugfs_entry,
644 + &state->sample_time);
647 + mutex_init(&state->lock);
649 + state->monitor_thread = kthread_run(monitor_thread, state,
656 +static int rpi_axiperf_remove(struct platform_device *dev)
660 + kthread_stop(state->monitor_thread);
662 + debugfs_remove_recursive(state->root_folder);
663 + state->root_folder = NULL;
668 +static const struct of_device_id rpi_axiperf_match[] = {
670 + .compatible = "brcm,bcm2835-axiperf",
674 +MODULE_DEVICE_TABLE(of, rpi_axiperf_match);
676 +static struct platform_driver rpi_axiperf_driver = {
677 + .probe = rpi_axiperf_probe,
678 + .remove = rpi_axiperf_remove,
680 + .name = "rpi-bcm2835-axiperf",
681 + .of_match_table = of_match_ptr(rpi_axiperf_match),
685 +module_platform_driver(rpi_axiperf_driver);
687 +/* Module information */
688 +MODULE_AUTHOR("James Hughes <james.hughes@raspberrypi.org>");
689 +MODULE_DESCRIPTION("RPI AXI Performance monitor driver");
690 +MODULE_LICENSE("GPL");