I'm trying to create a C++ program that gets log information from a text file like this:
local - - [24/Oct/1994:13:41:41 -0600] "GET index.html HTTP/1.0" 200 150
local - - [24/Oct/1994:13:41:41 -0600] "GET 1.gif HTTP/1.0" 200 1210
local - - [24/Oct/1994:13:43:13 -0600] "GET index.html HTTP/1.0" 200 3185
local - - [24/Oct/1994:13:43:14 -0600] "GET 2.gif HTTP/1.0" 200 2555
local - - [24/Oct/1994:13:43:15 -0600] "GET 3.gif HTTP/1.0" 200 36403
local - - [24/Oct/1994:13:43:17 -0600] "GET 4.gif HTTP/1.0" 200 441
local - - [24/Oct/1994:13:46:45 -0600] "GET index.html HTTP/1.0" 200 3185
Then I'm trying to get the file name which is after GET in each line, store it somewhere and count each time the file name is repeated in the log file.
After reading the file I print out the top 10 repeated file names.
My problem is that the code bellow counts for all lines in the log file - but that's not what I want: count file names between GET and HTTP.
#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>
#include <time.h>
#include <math.h>
const long MAX = 1000000;
std::string words[MAX];
long instances[MAX];
long count = 0;
void insert(std::string input) {
//check first, add if not present
for (long i = 0; i < count; i++)
if (input == words[i]) {
instances[i]++;
//std::cout << words[i] << std::endl;
return;
}
if (count < MAX) {
words[count] = input;
instances[count] = 1;
count++;
}
else
std::cerr << "Too many unquie words in the file";
}
long findTop(std::string &word) {
//int topIndex = 0;
long topCount = instances[0];
long topIndex = 0;
for (long i = 1; i<count; i++)
if (instances[i] > topCount) {
topCount = instances[i];
topIndex = i;
}
instances[topIndex] = 0;
word = words[topIndex];
//topIndex = i;
return topCount;
}
long frequency_of_primes(long n) {
long i, j;
long freq = n - 1;
for (i = 2; i <= n; ++i) for (j = sqrt(i); j>1; --j) if (i%j == 0) { --freq; break; }
return freq;
}
int main()
{
std::cout << "Please wait for the result!" << std::endl;
std::string word;
std::ifstream data("Text.txt");
while (data >> word)
insert(word);
long topCount = 0;
for (long i = 0; i<10; i++)
//cout << words[i] << " " << instances[i] << endl;
std::cout << " File Name: " << word << " Visitors #" << findTop(word) << std::endl;
clock_t t;
long f;
t = clock();
printf("Calculating...\n");
f = frequency_of_primes(99999);
printf("The number of primes lower than 100,000 is: %d\n", f);
t = clock() - t;
printf("It took me %d clicks (%f seconds).\n", t, ((float)t) / CLOCKS_PER_SEC);
return 0;
}