Perf allows you to use control file descriptors to enable or disable counting. It is documented under perf stat
's man page, but it also works for perf record
.
In short:
- You need to have a file descriptor ready and pass it to both
perf stat
or perf record
(with --control fd:${ctl_fd}
) and your application (argv
, environment variables, or however else your want). When your application decides to have perf start measuring, it sends enable\n
to that file descriptor and disable\n
to stop.
- Optionally, if you want perf to acknowledge that it did what you asked, you need to have a second file descriptor ready and pass both to perf with
--control fd:${ctl_fd},${ctl_fd_ack}
. Perf will respond by writing 5 bytes back (i.e., ack\n
followed by a null-terminator) that your application can read and check.
If the example in the documentation looks cryptic or too terse for you, here is an expanded version (NOTE: this code is intended to be short, it is NOT safe or suitable for production):
#include <assert.h> // assert
#include <stddef.h> // size_t
#include <stdio.h> // printf
#include <stdlib.h> // atoi, getenv
#include <string.h> // strcmp
#include <unistd.h> // read, write
size_t fib(size_t n)
{
if (n == 0)
return 0;
else if (n == 1)
return 1;
else
return fib(n - 1) + fib(n - 2);
}
int main(int argc, char* argv[])
{
int perf_ctl_fd;
int perf_ctl_ack_fd;
char ack[5];
size_t n;
size_t r;
// // Make sure we have the right number of arguments
// if (argc != 3)
// {
// fprintf(stderr, "Received %d arguments, expected 2.\n", argc - 1);
// fprintf(stderr, "Usage: %s <fd1> <fd2>", argv[0]);
// return 1;
// }
// perf_ctl_fd = atoi(argv[1]);
// perf_ctl_ack_fd = atoi(argv[2]);
perf_ctl_fd = atoi(getenv("PERF_CTL_FD"));
perf_ctl_ack_fd = atoi(getenv("PERF_CTL_ACK_FD"));
// scanf("%zu", &n);
n = 45;
// Start the performance counter and read the ack
write(perf_ctl_fd, "enable\n", 8);
read(perf_ctl_ack_fd, ack, 5);
assert(strcmp(ack, "ack\n") == 0);
// Compute the fibonacci number
r = fib(n);
// Stop the performance counter and read the ack
write(perf_ctl_fd, "disable\n", 9);
read(perf_ctl_ack_fd, ack, 5);
assert(strcmp(ack, "ack\n") == 0);
// Print the result
printf("Result: %zu\n", r);
return 0;
}
+ mkfifo ctl_fd.fifo
+ exec {ctl_fd}<>ctl_fd.fifo
+ mkfifo ctl_fd_ack.fifo
+ exec {ctl_fd_ack}<>ctl_fd_ack.fifo
+ echo ctl_fd: $ctl_fd
ctl_fd: 10
+ echo ctrl_fd_ack: $ctl_fd_ack
ctrl_fd_ack: 11
+ PERF_CTL_FD=$ctl_fd PERF_CTL_ACK_FD=$ctl_fd_ack perf stat --delay=-1 --control fd:${ctl_fd},${ctl_fd_ack} -- build/prog
++ PERF_CTL_FD=10
++ PERF_CTL_ACK_FD=11
++ perf stat --delay=-1 --control fd:10,11 -- build/prog
Events disabled
Events enabled
Events disabled
Result: 55
Performance counter stats for 'build/prog':
2,259.30 msec task-clock # 1.000 CPUs utilized
4 context-switches # 1.770 /sec
0 cpu-migrations # 0.000 /sec
0 page-faults # 0.000 /sec
7,162,104,467 cycles # 3.170 GHz
22,618,760,242 instructions # 3.16 insn per cycle
4,023,428,346 branches # 1.781 G/sec
14,050,645 branch-misses # 0.35% of all branches
2.260329786 seconds time elapsed
0.000000000 seconds user
0.000000000 seconds sys
+ exec {ctl_fd}>&-
+ exec {ctl_fd_ack}>&-
+ unlink ctl_fd.fifo
+ unlink ctl_fd_ack.fifo
Same goes with perf record
:
+ mkfifo ctl_fd.fifo
+ exec {ctl_fd}<>ctl_fd.fifo
+ mkfifo ctl_fd_ack.fifo
+ exec {ctl_fd_ack}<>ctl_fd_ack.fifo
+ PERF_CTL_FD=$ctl_fd PERF_CTL_ACK_FD=$ctl_fd_ack perf record --delay=-1 --control fd:${ctl_fd},${ctl_fd_ack} -- build/prog
++ PERF_CTL_FD=10
++ PERF_CTL_ACK_FD=11
++ perf record --delay=-1 --control fd:10,11 -- build/prog
Events disabled
Events enabled
Events disabled
Result: 1134903170
[ perf record: Woken up 7 times to write data ]
[ perf record: Captured and wrote 0.435 MB perf.data (9053 samples) ]
+ exec {ctl_fd}>&-
+ exec {ctl_fd_ack}>&-
+ perf report
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 9K of event 'cycles'
# Event count (approx.): 7173277849
#
# Overhead Command Shared Object Symbol
# ........ ....... ................. .............................
#
99.99% prog prog [.] fib
0.01% prog [kernel.kallsyms] [k] rcu_check_callbacks
+ unlink ctl_fd.fifo
+ unlink ctl_fd_ack.fifo