0

I've made a program that outputs you the most common word in txt file. And does anybody know how to optimize it that it would work for bigger files and faster.

The output image: https://i.stack.imgur.com/fVBh0.png

Here is the code.

#include <iostream>
#include <string>
#include <fstream>
#include <cstdlib>
#include <vector>
#include <algorithm>
#include <math.h>

using namespace std;

int main()
{
    ifstream in("file.txt");

    if(!in){
        cerr << "Could not open file.txt.";
        return EXIT_FAILURE;
    }

    string str, str2, strn, tab[10000], tab2[10000];
    int i, k, j, n, l, tabl;
    char c = 179;
    vector<int> tabs;
    vector<string> stringi;

    while(getline(in, str2)){
        str += str2;
        str += ' ';
    }
    k = 0;
    for(i = 0; i < str.length(); i++){
        if(str[i] != ' ' && str[i] != '.' && str[i] != '\t' && str[i] != ','
           && str[i] != ';' && str[i] != ':' && str[i] != '}' && str[i] != '{'){
            tab[k] += tolower(str[i]);
        }else{
            k++;
        }
        if(str[i] == '.' || str[i] == '\t' || str[i] == ',' || str[i] == ';'
        || str[i] == ':' || str[i] == '}' || str[i] == '{') {
            k--;
        }
    }
    tabl = k;

    k = 0;
    for(i = 0; i < tabl; i++){
        for(j = 0; j < tabl; j++){
            if(tab[i] == tab[j]){
                k++;
            }
        }
        tabs.push_back(k);
        k = 0;
    }
    for(i = 0; i < tabl; i++){
        for(j = 0; j < tabl-1; j++){
            if(tab[j] < tab[j+1]){
                n = tabs.at(j);
                tabs.at(j) = tabs.at(j+1);
                tabs.at(j+1) = n;
                strn = tab[j];
                tab[j] = tab[j+1];
                tab[j+1] = strn;
            }
        }
    }
    for(i = 0; i < tabl; i++){
        for(j = 0; j < tabl-1; j++){
            if(tabs.at(j) < tabs.at(j+1)){
                n = tabs.at(j);
                tabs.at(j) = tabs.at(j+1);
                tabs.at(j+1) = n;
                strn = tab[j];
                tab[j] = tab[j+1];
                tab[j+1] = strn;
            }
        }
    }
    tab2[0] = tab[0];
    for(i = 0; i < tabl; i++){
        if(tab[i] != tab[i+1]){
            tab2[i] = tab[i+1];
        }
    }
    k = 1;
    l++;
    for(i = 0; i < tabl; i++){
        if(!tab2[i].empty()){
            l++;
        }
    }
    cout << "------------------------------------" << endl;
    cout << "|--->TABLE OF MOST COMMON WORDS<---|" << endl;
    cout << "------------------------------------" << endl;
    for(i = 0; i < tabl; i++){
        if(!tab2[i].empty() && k <= 20 ){
            cout << c << k++ << "." << '\t' << c << tab2[i] << '\t' << c << "*" <<
            tabs.at(i+1)
            << '\t'  << c << roundf(((float)tabs.at(i+1)*100/l)*100)/100 << "%" <<
            endl;
        }
    }
    cout << "------------------------------------" << endl ;
    cout << "|----->Dif. strings: " << '\t' << l << "<-------|" << endl ;
    cout << "------------------------------------" << endl;

    return 0;
}
klemsi123
  • 103
  • 1
  • 11

1 Answers1

0

Since this doesn't attempt to read the entire file in one piece, the only upper limit on file size is how long you're willing to wait for your output.

#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <map>
#include <algorithm>

char convert(char arg)
{
    if (arg == '.' || arg == '\t' || arg == ',' || arg == ';'
            || arg == ':' || arg == '}' || arg == '{')
    {
        return ' ';     // convert delimiters to spaces
    }
    return(tolower(arg));  // convert everything else to lower case
}

int main()
{
    std::ifstream in("c:\\etc\\foo.txt");

    // This can be replaced with std::unordered_map if you are willing to sacrifice
    // lexical sorting of the output for speed
    std::map<std::string, int> counts;

    while (!in.eof())
    {
        std::string str;
        // Get the next space delimited word from the file
        in >> str;

        if (str == "")
        {
            // Reject empty strings, which can occur as we read the file
            continue;
        }

        // Convert to lower case, and convert our delimiter set to spaces
        std::transform(str.begin(), str.end(), str.begin(), convert);
        std::stringstream in1(str);

        while (!in1.eof())
        {
            std::string word;
            in1 >> word;
            if (word == "")
            {
                // reject empty words, which are also possible at this point
                continue;
            }
            // Use the map to count occurrences of the word
            auto it = counts.find(word);
            if (it == counts.end())
            {
                counts[word] = 1;
            }
            else
            {
                it->second++;
            }
        }
    }

    // Output the results here

    return 0;
}
dgnuff
  • 3,195
  • 2
  • 18
  • 32