I have been working with a program that will read through multiple text files, record the number of words in them, and write to a file all of the words and the frequency of them. However, I have encounter a segmentation fault somewhere in my code. I have tried using tools such as Valgrind to help me debug it, however it only points to where I say int i = 0
in the main loop. I apologize for posting a large portion of my code but I have spent hours trying to find where the bug is and cannot seem to find it for the life of me. The issues began when I started passing a structure in pthread_exit().
#include <iostream>
#include <fstream>
#include <string>
#include <pthread.h>
#include <vector>
#include <algorithm>
#include <sstream>
#include <iterator>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <cstdio>
using namespace std;
// Create a structure that we can store information in
typedef struct info{
int words;
string dictionary[500000];
} info;
// Counts the number of words in the text file so we know how big to make our array
int countWord(char *arg){
char words[25000];
int count = 0;
ifstream check;
check.open(arg);
while(!check.eof()){
check>>words;
count++;
}
cout<<"Word Count: "<< count << '\n';
check.close();
return count;
}
// Checks to see if the word exists in our dictionary or not
int findWord(string array[], string target, int wordCount){
for(int i = 0; i < wordCount; ++i){
if(array[i] == target){
return 1;
}
}
return 0;
}
// Checks to see how many times a word is repeated
int checkWord(string array[], string target, int wordCount){
int number = 0;
for(int i = 0; i < wordCount; i++){
if(array[i] == target){
number++;
}
}
return number;
}
void *threads(void *arg){
info information;
char *fileName = (char *)arg;
ifstream myfile (fileName);
string line;
string fullText[15000];
string dictionary[500000];
int wordCount = countWord(fileName);
int i = 0;
int find;
int check;
int x = 0;
int checkingStart = 0;
// Opens and reads the file word by word removing any symbols that we dislike
if (myfile.is_open()){
while(myfile >> line){
transform(line.begin(), line.end(), line.begin(), ::tolower);
line.erase(remove(line.begin(), line.end(), ','), line.end());
fullText[i] = line;
i++;
}
}
else cout << "Unable to Open the File";
myfile.close();
// Goes through and adds all the words to our dictionary
for(i = 0; i < wordCount; ++i){
find = findWord(dictionary, fullText[i], wordCount);
if(find == 0){
dictionary[x] = {fullText[i]};
++x;
checkingStart = 1;
}
}
// Sets each section of dictionary equal to the one in the structure
for(i = 0; i < wordCount; ++i){
information.dictionary[i] = dictionary[i];
}
// Sets words equal to word count and then passes the structure information out of the thread
information.words = wordCount;
pthread_exit(&information);
return NULL;
}
int main(){
int i = 0;
int x = 0;
int y = 0;
int z = 0;
int a = 0;
int b = 0;
int add = 0;
int currentSize = 0;
int checkingStart = 0;
int wordCount;
int find;
string fullDictionary[500000];
string dict[500000];
ofstream writeFile;
info information;
char *fileName;
char *fileList[2];
pthread_t threadCount[2];
int frequency[500000];
int check;
fileList[0] = "text1";
fileList[1] = "text2";
// Creates a loop that creates and joins threads for each text file
for(a = 0; a < 1; ++a){
fileName = fileList[a];
pthread_create(&threadCount[a], NULL, threads, &fileName);
pthread_join(threadCount[a], (void **)&information);
wordCount = information.words;
// Sets each part of dict equal to the same slot on info.dict
for(b = 0; b < wordCount; ++b){
dict[b] = information.dictionary[b];
}
// Adds to a complete list of all the text files added together
for(y = 0, z = currentSize; z < wordCount; ++z, ++y){
fullDictionary[z] = dict[y];
}
currentSize = (currentSize + wordCount);
}
// Goes through and adds all the words to our dictionary
for(i = 0; i < wordCount; ++i){
find = findWord(dict, fullDictionary[i], currentSize);
if(find == 0){
dict[x] = {fullDictionary[i]};
cout << "Added the Word: " << fullDictionary[i] << "\n";
add = 1;
checkingStart = 1;
}
// Checks the number of times each word appears in the text file
if(checkingStart == 1){
check = checkWord(fullDictionary, dict[x], wordCount);
frequency[x] = {check};
}
// Checks to see if it needs to move to the next open dictionary spot
if(add == 1){
++x;
add = 0;
}
}
return 0;
}