1 From b683c668fef086a8c723d55e88364405047d2196 Mon Sep 17 00:00:00 2001
2 From: James Hughes <JamesH65@users.noreply.github.com>
3 Date: Tue, 14 Nov 2017 15:13:15 +0000
4 Subject: [PATCH] AXI performance monitor driver (#2222)
6 Uses the debugfs I/F to provide access to the AXI
7 bus performance monitors.
9 Requires the new mailbox peripheral access for access
10 to the VPU performance registers, system bus access
11 is done using direct register reads.
13 Signed-off-by: James Hughes <james.hughes@raspberrypi.org>
15 drivers/perf/Kconfig | 7 +
16 drivers/perf/Makefile | 1 +
17 drivers/perf/raspberrypi_axi_monitor.c | 637 +++++++++++++++++++++++++
18 3 files changed, 645 insertions(+)
19 create mode 100644 drivers/perf/raspberrypi_axi_monitor.c
21 --- a/drivers/perf/Kconfig
22 +++ b/drivers/perf/Kconfig
23 @@ -102,4 +102,11 @@ config ARM_SPE_PMU
24 Extension, which provides periodic sampling of operations in
25 the CPU pipeline and reports this via the perf AUX interface.
28 + depends on ARCH_BCM2835
29 + tristate "RaspberryPi AXI Performance monitors"
32 + Say y if you want to use Raspberry Pi AXI performance monitors, m if
33 + you want to build it as a module.
35 --- a/drivers/perf/Makefile
36 +++ b/drivers/perf/Makefile
37 @@ -9,3 +9,4 @@ obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu
38 obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
39 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
40 obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
41 +obj-$(CONFIG_RPI_AXIPERF) += raspberrypi_axi_monitor.o
43 +++ b/drivers/perf/raspberrypi_axi_monitor.c
46 + * raspberrypi_axi_monitor.c
48 + * Author: james.hughes@raspberrypi.org
50 + * Raspberry Pi AXI performance counters.
52 + * Copyright (C) 2017 Raspberry Pi Trading Ltd.
54 + * This program is free software; you can redistribute it and/or modify
55 + * it under the terms of the GNU General Public License version 2 as
56 + * published by the Free Software Foundation.
59 +#include <linux/debugfs.h>
60 +#include <linux/devcoredump.h>
61 +#include <linux/device.h>
62 +#include <linux/kthread.h>
63 +#include <linux/module.h>
64 +#include <linux/netdevice.h>
65 +#include <linux/mutex.h>
66 +#include <linux/of.h>
67 +#include <linux/platform_device.h>
69 +#include <soc/bcm2835/raspberrypi-firmware.h>
71 +#define NUM_MONITORS 2
72 +#define NUM_BUS_WATCHERS_PER_MONITOR 3
74 +#define SYSTEM_MONITOR 0
75 +#define VPU_MONITOR 1
78 +#define DEFAULT_SAMPLE_TIME 100
80 +#define NUM_BUS_WATCHER_RESULTS 9
82 +struct bus_watcher_data {
84 + u32 results[NUM_BUS_WATCHER_RESULTS];
100 +struct rpi_axiperf {
101 + struct platform_device *dev;
102 + struct dentry *root_folder;
104 + struct task_struct *monitor_thread;
107 + struct rpi_firmware *firmware;
109 + /* Sample time spent on for each bus */
112 + /* Now storage for the per monitor settings and the resulting
113 + * performance figures
116 + /* Bit field of buses we want to monitor */
118 + /* Bit field of buses to filter by */
120 + /* The current buses being monitored on this monitor */
121 + int current_bus[NUM_BUS_WATCHERS_PER_MONITOR];
122 + /* The last bus monitored on this monitor */
123 + int last_monitored;
125 + /* Set true if this mailbox must use the mailbox interface
126 + * rather than access registers directly.
128 + int use_mailbox_interface;
130 + /* Current result values */
131 + struct bus_watcher_data results[MAX_BUSES];
133 + struct dentry *debugfs_entry;
134 + void __iomem *base_address;
136 + } monitor[NUM_MONITORS];
140 +static struct rpi_axiperf *state;
142 +/* Two monitors, System and VPU, each with the following register sets.
143 + * Each monitor can only monitor one bus at a time, so we time share them,
144 + * giving each bus 100ms (default, settable via debugfs) of time on its
145 + * associated monitor
146 + * Record results from the three Bus watchers per monitor and push to the sysfs
149 +/* general registers */
152 +const int GEN_CTL_ENABLE_BIT = BIT(0);
153 +const int GEN_CTL_RESET_BIT = BIT(1);
155 +/* Bus watcher registers */
156 +const int BW_PITCH = 0x40;
158 +const int BW0_CTRL = 0x40;
159 +const int BW1_CTRL = 0x80;
160 +const int BW2_CTRL = 0xc0;
162 +const int BW_ATRANS_OFFSET = 0x04;
163 +const int BW_ATWAIT_OFFSET = 0x08;
164 +const int BW_AMAX_OFFSET = 0x0c;
165 +const int BW_WTRANS_OFFSET = 0x10;
166 +const int BW_WTWAIT_OFFSET = 0x14;
167 +const int BW_WMAX_OFFSET = 0x18;
168 +const int BW_RTRANS_OFFSET = 0x1c;
169 +const int BW_RTWAIT_OFFSET = 0x20;
170 +const int BW_RMAX_OFFSET = 0x24;
172 +const int BW_CTRL_RESET_BIT = BIT(31);
173 +const int BW_CTRL_ENABLE_BIT = BIT(30);
174 +const int BW_CTRL_ENABLE_ID_FILTER_BIT = BIT(29);
175 +const int BW_CTRL_LIMIT_HALT_BIT = BIT(28);
177 +const int BW_CTRL_SOURCE_SHIFT = 8;
178 +const int BW_CTRL_SOURCE_MASK = GENMASK(12, 8); // 5 bits
179 +const int BW_CTRL_BUS_WATCH_SHIFT;
180 +const int BW_CTRL_BUS_WATCH_MASK = GENMASK(5, 0); // 6 bits
181 +const int BW_CTRL_BUS_FILTER_SHIFT = 8;
183 +const static char *bus_filter_strings[] = {
218 +const int num_bus_filters = ARRAY_SIZE(bus_filter_strings);
220 +const static char *system_bus_string[] = {
239 +const int num_system_buses = ARRAY_SIZE(system_bus_string);
241 +const static char *vpu_bus_string[] = {
260 +const int num_vpu_buses = ARRAY_SIZE(vpu_bus_string);
262 +const static char *monitor_name[] = {
267 +static inline void write_reg(int monitor, int reg, u32 value)
269 + writel(value, state->monitor[monitor].base_address + reg);
272 +static inline u32 read_reg(int monitor, u32 reg)
274 + return readl(state->monitor[monitor].base_address + reg);
277 +static void read_bus_watcher(int monitor, int watcher, u32 *results)
279 + if (state->monitor[monitor].use_mailbox_interface) {
280 + /* We have 9 results, plus the overheads of start address and
281 + * length So 11 u32 to define
286 + tmp[0] = (u32)(state->monitor[monitor].base_address + watcher
287 + + BW_ATRANS_OFFSET);
288 + tmp[1] = NUM_BUS_WATCHER_RESULTS;
290 + err = rpi_firmware_property(state->firmware,
291 + RPI_FIRMWARE_GET_PERIPH_REG,
294 + if (err < 0 || tmp[1] != NUM_BUS_WATCHER_RESULTS)
295 + dev_err_once(&state->dev->dev,
296 + "Failed to read bus watcher");
298 + memcpy(results, &tmp[2],
299 + NUM_BUS_WATCHER_RESULTS * sizeof(u32));
302 + void __iomem *addr = state->monitor[monitor].base_address
303 + + watcher + BW_ATRANS_OFFSET;
304 + for (i = 0; i < NUM_BUS_WATCHER_RESULTS; i++, addr += 4)
305 + *results++ = readl(addr);
309 +static void set_monitor_control(int monitor, u32 set)
311 + if (state->monitor[monitor].use_mailbox_interface) {
312 + u32 tmp[3] = {(u32)(state->monitor[monitor].base_address +
313 + GEN_CTRL), 1, set};
314 + int err = rpi_firmware_property(state->firmware,
315 + RPI_FIRMWARE_SET_PERIPH_REG,
318 + if (err < 0 || tmp[1] != 1)
319 + dev_err_once(&state->dev->dev,
320 + "Failed to set monitor control");
322 + write_reg(monitor, GEN_CTRL, set);
325 +static void set_bus_watcher_control(int monitor, int watcher, u32 set)
327 + if (state->monitor[monitor].use_mailbox_interface) {
328 + u32 tmp[3] = {(u32)(state->monitor[monitor].base_address +
330 + int err = rpi_firmware_property(state->firmware,
331 + RPI_FIRMWARE_SET_PERIPH_REG,
333 + if (err < 0 || tmp[1] != 1)
334 + dev_err_once(&state->dev->dev,
335 + "Failed to set bus watcher control");
337 + write_reg(monitor, watcher, set);
340 +static void monitor(struct rpi_axiperf *state)
342 + int monitor, num_buses[NUM_MONITORS];
344 + mutex_lock(&state->lock);
346 + for (monitor = 0; monitor < NUM_MONITORS; monitor++) {
347 + typeof(state->monitor[0]) *mon = &(state->monitor[monitor]);
349 + /* Anything enabled? */
350 + if (mon->bus_enabled == 0) {
351 + /* No, disable all monitoring for this monitor */
352 + set_monitor_control(monitor, GEN_CTL_RESET_BIT);
356 + /* Find out how many busses we want to monitor, and
357 + * spread our 3 actual monitors over them
359 + num_buses[monitor] = hweight32(mon->bus_enabled);
360 + num_buses[monitor] = min(num_buses[monitor],
361 + NUM_BUS_WATCHERS_PER_MONITOR);
363 + for (i = 0; i < num_buses[monitor]; i++) {
367 + mon->last_monitored++;
368 + mon->last_monitored &= 0xf;
369 + } while ((mon->bus_enabled &
370 + (1 << mon->last_monitored)) == 0);
372 + mon->current_bus[i] = mon->last_monitored;
374 + /* Reset the counters */
375 + set_bus_watcher_control(monitor,
378 + BW_CTRL_RESET_BIT);
380 + bus_control = BW_CTRL_ENABLE_BIT |
381 + mon->current_bus[i];
383 + if (mon->bus_filter) {
385 + BW_CTRL_ENABLE_ID_FILTER_BIT;
387 + ((mon->bus_filter & 0x1f)
388 + << BW_CTRL_BUS_FILTER_SHIFT);
392 + set_bus_watcher_control(monitor,
393 + BW0_CTRL + i*BW_PITCH,
398 + /* start monitoring */
399 + set_monitor_control(monitor, GEN_CTL_ENABLE_BIT);
402 + mutex_unlock(&state->lock);
404 + msleep(state->sample_time);
406 + /* Now read the results */
408 + mutex_lock(&state->lock);
409 + for (monitor = 0; monitor < NUM_MONITORS; monitor++) {
410 + typeof(state->monitor[0]) *mon = &(state->monitor[monitor]);
412 + /* Anything enabled? */
413 + if (mon->bus_enabled == 0) {
414 + /* No, disable all monitoring for this monitor */
415 + set_monitor_control(monitor, 0);
419 + for (i = 0; i < num_buses[monitor]; i++) {
420 + int bus = mon->current_bus[i];
422 + read_bus_watcher(monitor,
423 + BW0_CTRL + i*BW_PITCH,
424 + (u32 *)&mon->results[bus].results);
428 + mutex_unlock(&state->lock);
431 +static int monitor_thread(void *data)
433 + struct rpi_axiperf *state = data;
438 + if (kthread_should_stop())
444 +static ssize_t myreader(struct file *fp, char __user *user_buffer,
445 + size_t count, loff_t *position)
447 +#define INIT_BUFF_SIZE 2048
450 + int idx = (int)(fp->private_data);
451 + int num_buses, cnt;
452 + char *string_buffer;
453 + int buff_size = INIT_BUFF_SIZE;
455 + typeof(state->monitor[0]) *mon = &(state->monitor[idx]);
457 + if (idx < 0 || idx > NUM_MONITORS)
460 + num_buses = idx == SYSTEM_MONITOR ? num_system_buses : num_vpu_buses;
462 + string_buffer = kmalloc(buff_size, GFP_KERNEL);
464 + if (!string_buffer) {
465 + dev_err(&state->dev->dev,
466 + "Failed temporary string allocation\n");
472 + mutex_lock(&state->lock);
474 + if (mon->bus_filter) {
475 + int filt = min(mon->bus_filter & 0x1f, num_bus_filters);
477 + cnt = snprintf(p, buff_size,
478 + "\nMonitoring transactions from %s only\n",
479 + bus_filter_strings[filt]);
484 + cnt = snprintf(p, buff_size, " Bus | Atrans Atwait AMax Wtrans Wtwait WMax Rtrans Rtwait RMax\n"
485 + "======================================================================================================\n");
487 + if (cnt >= buff_size)
493 + for (i = 0; i < num_buses; i++) {
494 + if (mon->bus_enabled & (1 << i)) {
495 +#define DIVIDER (1024)
496 + typeof(mon->results[0]) *res = &(mon->results[i]);
498 + cnt = snprintf(p, buff_size,
499 + "%10s | %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK\n",
500 + idx == SYSTEM_MONITOR ?
501 + system_bus_string[i] :
503 + res->atrans/DIVIDER,
504 + res->atwait/DIVIDER,
506 + res->wtrans/DIVIDER,
507 + res->wtwait/DIVIDER,
509 + res->rtrans/DIVIDER,
510 + res->rtwait/DIVIDER,
513 + if (cnt >= buff_size)
521 + mutex_unlock(&state->lock);
525 + /* did the last string entry exceeed our buffer size? ie out of string
526 + * buffer space. Null terminate, use what we have.
528 + if (cnt >= buff_size) {
530 + string_buffer[INIT_BUFF_SIZE] = 0;
533 + cnt = simple_read_from_buffer(user_buffer, count, position,
535 + INIT_BUFF_SIZE - buff_size);
537 + kfree(string_buffer);
542 +static ssize_t mywriter(struct file *fp, const char __user *user_buffer,
543 + size_t count, loff_t *position)
545 + int idx = (int)(fp->private_data);
547 + if (idx < 0 || idx > NUM_MONITORS)
550 + /* At the moment, this does nothing, but in the future it could be
551 + * used to reset counters etc
556 +static const struct file_operations fops_debug = {
559 + .open = simple_open
562 +static int rpi_axiperf_probe(struct platform_device *pdev)
565 + struct device *dev = &pdev->dev;
566 + struct device_node *np = dev->of_node;
567 + struct device_node *fw_node;
569 + state = kzalloc(sizeof(struct rpi_axiperf), GFP_KERNEL);
573 + /* Get the firmware handle for future rpi-firmware-xxx calls */
574 + fw_node = of_parse_phandle(np, "firmware", 0);
576 + dev_err(dev, "Missing firmware node\n");
580 + state->firmware = rpi_firmware_get(fw_node);
581 + if (!state->firmware)
582 + return -EPROBE_DEFER;
584 + /* Special case for the VPU monitor, we must use the mailbox interface
585 + * as it is not accessible from the ARM address space.
587 + state->monitor[VPU_MONITOR].use_mailbox_interface = 1;
588 + state->monitor[SYSTEM_MONITOR].use_mailbox_interface = 0;
590 + for (i = 0; i < NUM_MONITORS; i++) {
591 + if (state->monitor[i].use_mailbox_interface) {
592 + of_property_read_u32_index(np, "reg", i*2,
593 + (u32 *)(&state->monitor[i].base_address));
595 + struct resource *resource =
596 + platform_get_resource(pdev, IORESOURCE_MEM, i);
598 + state->monitor[i].base_address =
599 + devm_ioremap_resource(&pdev->dev, resource);
602 + if (IS_ERR(state->monitor[i].base_address))
603 + return PTR_ERR(state->monitor[i].base_address);
605 + /* Enable all buses by default */
606 + state->monitor[i].bus_enabled = 0xffff;
610 + platform_set_drvdata(pdev, state);
612 + state->sample_time = DEFAULT_SAMPLE_TIME;
614 + /* Set up all the debugfs stuff */
615 + state->root_folder = debugfs_create_dir(KBUILD_MODNAME, NULL);
617 + for (i = 0; i < NUM_MONITORS; i++) {
618 + state->monitor[i].debugfs_entry =
619 + debugfs_create_dir(monitor_name[i], state->root_folder);
620 + if (IS_ERR(state->monitor[i].debugfs_entry))
621 + state->monitor[i].debugfs_entry = NULL;
623 + debugfs_create_file("data", 0444,
624 + state->monitor[i].debugfs_entry,
625 + (void *)i, &fops_debug);
626 + debugfs_create_u32("enable", 0644,
627 + state->monitor[i].debugfs_entry,
628 + &state->monitor[i].bus_enabled);
629 + debugfs_create_u32("filter", 0644,
630 + state->monitor[i].debugfs_entry,
631 + &state->monitor[i].bus_filter);
632 + debugfs_create_u32("sample_time", 0644,
633 + state->monitor[i].debugfs_entry,
634 + &state->sample_time);
637 + mutex_init(&state->lock);
639 + state->monitor_thread = kthread_run(monitor_thread, state,
646 +static int rpi_axiperf_remove(struct platform_device *dev)
650 + kthread_stop(state->monitor_thread);
652 + debugfs_remove_recursive(state->root_folder);
653 + state->root_folder = NULL;
658 +static const struct of_device_id rpi_axiperf_match[] = {
660 + .compatible = "brcm,bcm2835-axiperf",
664 +MODULE_DEVICE_TABLE(of, rpi_axiperf_match);
666 +static struct platform_driver rpi_axiperf_driver = {
667 + .probe = rpi_axiperf_probe,
668 + .remove = rpi_axiperf_remove,
670 + .name = "rpi-bcm2835-axiperf",
671 + .of_match_table = of_match_ptr(rpi_axiperf_match),
675 +module_platform_driver(rpi_axiperf_driver);
677 +/* Module information */
678 +MODULE_AUTHOR("James Hughes <james.hughes@raspberrypi.org>");
679 +MODULE_DESCRIPTION("RPI AXI Performance monitor driver");
680 +MODULE_LICENSE("GPL");