1

I would like to dedicate a thread in my program to gathering metrics on its performance. Memory usage, CPU etc. I've been trying to do this using the /proc/stat and /proc/pid/stat files. I'm currently stuck at trying to measure the %CPU usage. The values reported by my program are totally out of line with what 'top' is reporting. I'm tried this on a few different linux distros and am seeing the same results on each.

Here is the code I am using to calculate the percentage. Can anyone spot any issues here?

https://github.com/mmcilroy/cpu_usage

#include <stdlib.h>
#include <sys/types.h>
#include <sys/times.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>

struct pstat {
    long unsigned int utime_ticks;
    long int cutime_ticks;
    long unsigned int stime_ticks;
    long int cstime_ticks;
    long unsigned int vsize; // virtual memory size in bytes
    long unsigned int rss; //Resident  Set  Size in bytes
    long unsigned int cpu_total_time;
};

int get_usage(const pid_t pid, struct pstat* result) {

    //convert  pid to string
    char pid_s[20];
    snprintf(pid_s, sizeof(pid_s), "%d", pid);

    char stat_filepath[30] = "/proc/"; strncat(stat_filepath, pid_s,
            sizeof(stat_filepath) - strlen(stat_filepath) -1);
    strncat(stat_filepath, "/stat", sizeof(stat_filepath) -
            strlen(stat_filepath) -1);

    FILE *fpstat = fopen(stat_filepath, "r");
    if (fpstat == NULL) {
        perror("FOPEN ERROR ");
        return -1;
    }

    FILE *fstat = fopen("/proc/stat", "r");
    if (fstat == NULL) {
        perror("FOPEN ERROR ");
        fclose(fstat);
        return -1;
    }

    //read values from /proc/pid/stat
    bzero(result, sizeof(struct pstat));
    long int rss;
    if (fscanf(fpstat, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu"
                "%lu %ld %ld %*d %*d %*d %*d %*u %lu %ld",
                &result->utime_ticks, &result->stime_ticks,
                &result->cutime_ticks, &result->cstime_ticks, &result->vsize,
                &rss) == EOF) {
        fclose(fpstat);
        return -1;
    }
    fclose(fpstat);
    result->rss = rss * getpagesize();

    //read+calc cpu total time from /proc/stat
    long unsigned int cpu_time[10];
    bzero(cpu_time, sizeof(cpu_time));
    if (fscanf(fstat, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
                &cpu_time[0], &cpu_time[1], &cpu_time[2], &cpu_time[3],
                &cpu_time[4], &cpu_time[5], &cpu_time[6], &cpu_time[7],
                &cpu_time[8], &cpu_time[9]) == EOF) {
        fclose(fstat);
        return -1;
    }

    fclose(fstat);

    for(int i=0; i < 4;i++)
        result->cpu_total_time += cpu_time[i];

    printf( "usage: cpu %lu, utime %lu, stime %lu\n", result->cpu_total_time, result->utime_ticks, result->stime_ticks );

    return 0;
}

void calc_cpu_usage_pct(const struct pstat* cur_usage,
                        const struct pstat* last_usage,
                        double* usage)
{
    printf( "delta: cpu %lu, utime %lu, stime %lu\n",
        cur_usage->cpu_total_time - last_usage->cpu_total_time,
        cur_usage->utime_ticks - last_usage->utime_ticks,
        cur_usage->stime_ticks - last_usage->stime_ticks );

    const long unsigned int cpu_diff = cur_usage->cpu_total_time - last_usage->cpu_total_time;
    const long unsigned int pid_diff =
        ( cur_usage->utime_ticks + cur_usage->utime_ticks + cur_usage->stime_ticks - cur_usage->stime_ticks ) -
        ( last_usage->utime_ticks + last_usage->utime_ticks + last_usage->stime_ticks - last_usage->stime_ticks );

    *usage = 100.0 * ( (double)pid_diff / (double)cpu_diff );
}

int main( int argc, char* argv[] )
{
    pstat prev, curr;
    double pct;

    struct tms t;
    times( &t );

    if( argc <= 1 ) {
        printf( "please supply a pid\n" ); return 1;
    }

    while( 1 )
    {
        if( get_usage(atoi(argv[1]), &prev) == -1 ) {
            printf( "error\n" );
        }

        sleep( 5 );

        if( get_usage(atoi(argv[1]), &curr) == -1 ) {
            printf( "error\n" );
        }

        calc_cpu_usage_pct(&curr, &prev, &pct);

        printf("%%cpu: %.02f\n", pct);
    }
}

If you want to try it out for yourself, the program expect 1 arguments - the pid of a process to monitor

sr01853
  • 6,043
  • 1
  • 19
  • 39
user1664098
  • 143
  • 1
  • 3
  • 7
  • So, post your code that does the calculation directly, rather than as a link. Surely it's not much more than 30-40 lines? – Mats Petersson Apr 15 '13 at 09:25
  • Denote cur_usage->utime_ticks as "a", cur_usage->stime_ticks as "b", last_usage->utime_ticks as "c", and last_usage->stime_ticks as "d". Then you calculate pid_diff as "(a+a+b-b) - (c+c+d-d)", which is 2a-2c. Not sure whether "b" and "d" should be taken into account, but isn't the factor 2 here is suspicious? – Nitzan Shaked Apr 15 '13 at 12:06
  • Yes you're right. That's a mistake in the code. Still doesn't work as I would expect even after I fixed it though :) – user1664098 Apr 15 '13 at 13:02
  • Do you know about `getrusage(2)` and `times(2)`? More portable *and* easier to access. – Ben Jackson Sep 03 '13 at 22:18

5 Answers5

4

I know this is a bit old but I can explain why your new equation works: (1/INTERVAL) * (pid diff)

It's just a simplification of the basic percentage equation 100 * (pid diff) / (cpu diff), which looks like what you were trying to do in your first example.

The cpu time in /proc/stat (and the utime and stime in /proc/pid/stat) is reported in USER_HZ (or jiffies). This value is usually 1/100 of a second. This means that there will be 100 "tics" in each second for the CPU, which means your "CPU diff" will be INTERVAL*100.

Substitute that in and you get:

100 * (pid diff) / (INTERVAL * 100)

Cancel out the 100's and you are left with:

(pid diff) / INTERVAL

Which is the same as what you are now using. This also means that if you did indeed correct the problems you have in the code at the top, then that should work as well. The pid diff should be (curr utime + curr stime) - (prev utime + prev stime). If it doesn't work, then perhaps the way you are adding up the CPU time is wrong? It'd be easy to test because you know what value it should be (INTERVAL*100).

Since you now have a working equation, you may not care to figure out the problem with the original code but keep in mind that if you ever try to use it on a system where USER_HZ is not 1/100, the equation will be invalid.

totymedli
  • 29,531
  • 22
  • 131
  • 165
Sophia
  • 41
  • 2
2

I examined the source for top (from procps). Seems it is essentially performing the following calculation...

(1/interval) * (utime+stime)

Where interval it the number of seconds between samples. utime / stime are read directly from /proc/pid/stat

I must admit I don't understand why this works (it shouldn't according to "man proc"), but I've tested this with numerous different scenarios and the output from my program always matches that of "top".

Would be interested to hear some feedback on why this works :)

Here's my latest source

#include <stdlib.h>
#include <sys/types.h>
#include <sys/times.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>

#define INTERVAL 3

struct pstat {
    long unsigned int utime_ticks;
    long int cutime_ticks;
    long unsigned int stime_ticks;
    long int cstime_ticks;
    long unsigned int vsize; // virtual memory size in bytes
    long unsigned int rss; //Resident  Set  Size in bytes
};

int get_usage(const pid_t pid, struct pstat* result) {

    //convert  pid to string
    char pid_s[20];
    snprintf(pid_s, sizeof(pid_s), "%d", pid);

    char stat_filepath[30] = "/proc/"; strncat(stat_filepath, pid_s,
            sizeof(stat_filepath) - strlen(stat_filepath) -1);
    strncat(stat_filepath, "/stat", sizeof(stat_filepath) -
            strlen(stat_filepath) -1);

    FILE *fpstat = fopen(stat_filepath, "r");
    if (fpstat == NULL) {
        perror("FOPEN ERROR ");
        return -1;
    }

    //read values from /proc/pid/stat
    bzero(result, sizeof(struct pstat));
    long int rss;
    if (fscanf(fpstat, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu"
                "%lu %ld %ld %*d %*d %*d %*d %*u %lu %ld",
                &result->utime_ticks, &result->stime_ticks,
                &result->cutime_ticks, &result->cstime_ticks, &result->vsize,
                &rss) == EOF) {
        fclose(fpstat);
        return -1;
    }
    fclose(fpstat);
    result->rss = rss * getpagesize();

    return 0;
}

void calc_cpu_usage_pct(const struct pstat* cur_usage,
                        const struct pstat* last_usage,
                        double* usage)
{
    const long unsigned int pid_diff =
        ( cur_usage->utime_ticks + cur_usage->stime_ticks ) -
        ( last_usage->utime_ticks + last_usage->stime_ticks );

    printf( "delta %lu\n", pid_diff );

    *usage = 1/(float)INTERVAL * pid_diff;
}

int main( int argc, char* argv[] )
{
    pstat prev, curr;
    double pct;

    struct tms t;
    times( &t );

    if( argc <= 1 ) {
        printf( "please supply a pid\n" ); return 1;
    }

    while( 1 )
    {
        if( get_usage(atoi(argv[1]), &prev) == -1 ) {
            printf( "error\n" );
        }

        sleep( INTERVAL );

        if( get_usage(atoi(argv[1]), &curr) == -1 ) {
            printf( "error\n" );
        }

        calc_cpu_usage_pct(&curr, &prev, &pct);

        printf("%%cpu: %.02f\n", pct);
    }
}
user1664098
  • 143
  • 1
  • 3
  • 7
0

This command in linux may be useful for linux.

# apt-get install sysstat
# up2date sysstat
# mpstat 

Now you find how to find how to get command line output as string and parse. You can also use different parameters of mpstat. Also try $ top.

Get help from this link.

Community
  • 1
  • 1
pcbabu
  • 2,219
  • 4
  • 22
  • 32
0

The main loop is somewhat off: instead of getting "prev", then sleeping, then getting "next" and calculating the difference, you should get "prev" once outside the loop, and inside the loop get "curr", calculate, the copy "curr" into "prev" and then loop again. This fixes the part where 50% of the used time is not counted.

Nitzan Shaked
  • 13,460
  • 5
  • 45
  • 54
  • Sorry I don't follow. Isn't this essentially the same as what I'm doing now? I'm sampling the cpu / process usage at 5 second intervals and calculating the % based on the difference between these 2 samples – user1664098 Apr 15 '13 at 10:14
  • True. Just a matter of style. I now realize I should have wrote this as a comment, not an answer. My bad. – Nitzan Shaked Apr 15 '13 at 12:04
0

try seeing the top command source code , source code will be available in busybox

EDIT: replace mpstat with top as mpstat shows overall usage

John
  • 449
  • 5
  • 12
  • Unless I'm mistaken mpstat measures only the % usage of each cpu, not the % cpu usage of each process. To do this it needs only look at /proc/stat – user1664098 Apr 15 '13 at 10:26
  • sorry about that, i have edited the comment, thanks. Please take a look at busybox code of finding cpu usage of each process (grep for the "/proc/stat" in busybox directory) – John Apr 15 '13 at 10:41