Have a look at this, it should help you.
main.cpp
#include "stdafx.h"
#include "Utility.h"
int main() {
using namespace util;
std::string fileName( "sample.txt" );
if ( fileName.empty() ) {
std::cout << "Missing or invalid filename." << std::endl;
return RETURN_ERROR;
}
std::string line;
std::vector<std::string> results;
std::fstream fin;
// Try To Open File For Reading
fin.open( fileName.c_str(), std::ios_base::in );
if ( !fin.is_open() ) {
std::cout << "Can not open file(" << fileName << ") for reading." << std::endl;
return RETURN_ERROR;
}
// Read Line By Line To Get Data Contents Store Into String To Be Parsed
while ( !fin.eof() ) {
std::getline( fin, line );
// Parse Each Line Using Space Character As Delimiter
results = Utility::splitString( line, " " );
// Print The Results On Each Iteration Of This While Loop
// This Is Where You Would Parse The Data Or Store Results Into
// Class Objects, Variables Or Structures.
for ( unsigned u = 0; u < results.size(); u++ ) {
std::cout << results[u] << " ";
}
std::cout << std::endl;
}
// Close File Pointer
fin.close();
// Now Print The Full Vector Of Results - This Is To Show You That Each
// New Line Will Be Overwritten And That Only The Last Line Of The File Will
// Be Stored After The While Loop.
std::cout << "\n-------------------------------------\n";
for ( unsigned u = 0; u < results.size(); u++ ) {
std::cout << results[u] << " ";
}
Utility::pressAnyKeyToQuit();
return RETURN_OK;
} // main
sample.txt
Please help me parse this text file
It spans multiple lines of text
I would like to get each individual word
stdafx.h - Some of these include files may not be needed they are here for I have a larger solution that requires them.
#ifndef STDAFX_H
#define STDAFX_H
#include <Windows.h>
#include <stdio.h>
#include <tchar.h>
#include <conio.h>
#include <string>
#include <sstream>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <vector>
#include <array>
#include <memory>
#include <queue>
#include <functional>
#include <algorithm>
// User Application Specific
// #include "ExceptionHandler.h" - One Of My Class Objects Not Used Here
namespace util {
enum ReturnCode {
RETURN_OK = 0,
RETURN_ERROR = 1,
}; // ReturnCode
extern const unsigned INVALID_UNSIGNED;
extern const unsigned INVALID_UNSIGNED_SHORT;
} // namespace util
#endif // STDAFX_H
stdafx.cpp
#include "stdafx.h"
namespace util {
const unsigned INVALID_UNSIGNED = static_cast<const unsigned>( -1 );
const unsigned INVALID_UNSIGNED_SHORT = static_cast<const unsigned short>( -1 );
} // namespace util
Utility.h
#ifndef UTILITY_H
#define UTILITY_H
namespace util {
class Utility {
public:
static void pressAnyKeyToQuit();
static std::string toUpper(const std::string& str);
static std::string toLower(const std::string& str);
static std::string trim(const std::string& str, const std::string elementsToTrim = " \t\n\r");
static unsigned convertToUnsigned(const std::string& str);
static int convertToInt(const std::string& str);
static float convertToFloat(const std::string& str);
static std::vector<std::string> splitString(const std::string& strStringToSplit, const std::string& strDelimiter, const bool keepEmpty = true);
private:
Utility(); // Private - Not A Class Object
Utility(const Utility& c); // Not Implemented
Utility& operator=(const Utility& c); // Not Implemented
template<typename T>
static bool stringToValue(const std::string& str, T* pValue, unsigned uNumValues);
template<typename T>
static T getValue(const std::string& str, std::size_t& remainder);
}; // Utility
#include "Utility.inl"
} // namespace util
#endif // UTILITY_H
Utility.inl
// ----------------------------------------------------------------------------
// stringToValue()
template<typename T>
static bool Utility::stringToValue(const std::string& str, T* pValue, unsigned uNumValues) {
int numCommas = std::count(str.begin(), str.end(), ',');
if (numCommas != uNumValues - 1) {
return false;
}
std::size_t remainder;
pValue[0] = getValue<T>(str, remainder);
if (uNumValues == 1) {
if (str.size() != remainder) {
return false;
}
}
else {
std::size_t offset = remainder;
if (str.at(offset) != ',') {
return false;
}
unsigned uLastIdx = uNumValues - 1;
for (unsigned u = 1; u < uNumValues; ++u) {
pValue[u] = getValue<T>(str.substr(++offset), remainder);
offset += remainder;
if ((u < uLastIdx && str.at(offset) != ',') ||
(u == uLastIdx && offset != str.size()))
{
return false;
}
}
}
return true;
} // stringToValue
Utility.cpp
#include "stdafx.h"
#include "Utility.h"
namespace util {
// ----------------------------------------------------------------------------
// pressAnyKeyToQuit()
void Utility::pressAnyKeyToQuit() {
std::cout << "\nPress any key to quit" << std::endl;
_getch();
} // pressAnyKeyToQuit
// ----------------------------------------------------------------------------
// toUpper()
std::string Utility::toUpper( const std::string& str ) {
std::string result = str;
std::transform( str.begin(), str.end(), result.begin(), ::toupper );
return result;
} // toUpper
// ----------------------------------------------------------------------------
// toLower()
std::string Utility::toLower( const std::string& str ) {
std::string result = str;
std::transform( str.begin(), str.end(), result.begin(), ::tolower );
return result;
} // toLower
// ----------------------------------------------------------------------------
// trim()
// Removes Elements To Trim From Left And Right Side Of The str
std::string Utility::trim( const std::string& str, const std::string elementsToTrim ) {
std::basic_string<char>::size_type firstIndex = str.find_first_not_of( elementsToTrim );
if ( firstIndex == std::string::npos ) {
return std::string(); // Nothing Left
}
std::basic_string<char>::size_type lastIndex = str.find_last_not_of( elementsToTrim );
return str.substr( firstIndex, lastIndex - firstIndex + 1 );
} // trim
// ----------------------------------------------------------------------------
// getValue()
template<>
float Utility::getValue( const std::string& str, std::size_t& remainder ) {
return std::stof( str, &remainder );
} // getValue <float>
// ----------------------------------------------------------------------------
// getValue()
template<>
int Utility::getValue( const std::string& str, std::size_t& remainder ) {
return std::stoi( str, &remainder );
} // getValue <int>
// ----------------------------------------------------------------------------
// getValue()
template<>
unsigned Utility::getValue( const std::string& str, std::size_t& remainder ) {
return std::stoul( str, &remainder );
} // getValue <unsigned>
// ----------------------------------------------------------------------------
// convertToUnsigned()
unsigned Utility::convertToUnsigned( const std::string& str ) {
unsigned u = 0;
if ( !stringToValue( str, &u, 1 ) ) {
std::ostringstream strStream;
strStream << __FUNCTION__ << " Bad conversion of [" << str << "] to unsigned";
throw strStream.str();
}
return u;
} // convertToUnsigned
// ----------------------------------------------------------------------------
// convertToInt()
int Utility::convertToInt( const std::string& str ) {
int i = 0;
if ( !stringToValue( str, &i, 1 ) ) {
std::ostringstream strStream;
strStream << __FUNCTION__ << " Bad conversion of [" << str << "] to int";
throw strStream.str();
}
return i;
} // convertToInt
// ----------------------------------------------------------------------------
// convertToFloat()
float Utility::convertToFloat(const std::string& str) {
float f = 0;
if (!stringToValue(str, &f, 1)) {
std::ostringstream strStream;
strStream << __FUNCTION__ << " Bad conversion of [" << str << "] to float";
throw strStream.str();
}
return f;
} // convertToFloat
// ----------------------------------------------------------------------------
// splitString()
std::vector<std::string> Utility::splitString( const std::string& strStringToSplit, const std::string& strDelimiter, const bool keepEmpty ) {
std::vector<std::string> vResult;
if ( strDelimiter.empty() ) {
vResult.push_back( strStringToSplit );
return vResult;
}
std::string::const_iterator itSubStrStart = strStringToSplit.begin(), itSubStrEnd;
while ( true ) {
itSubStrEnd = search( itSubStrStart, strStringToSplit.end(), strDelimiter.begin(), strDelimiter.end() );
std::string strTemp( itSubStrStart, itSubStrEnd );
if ( keepEmpty || !strTemp.empty() ) {
vResult.push_back( strTemp );
}
if ( itSubStrEnd == strStringToSplit.end() ) {
break;
}
itSubStrStart = itSubStrEnd + strDelimiter.size();
}
return vResult;
} // splitString
} // namspace util
In my small utility library I have a function that will split a string that can use any delimiter that the user defines. It will search for the first occurrence of that character delimiter and it will save everything before it into a string and it will push that string into a vector of strings, and it will continue this for every occurrence of that character until it is finished parsing the full string that is passed to it. It will then return a vector of strings back to the user. This is very helpful when engaged in parsing text files or even just data types with long strings that need to be broken down. Now if there is a case where you are parsing a text file and lets say you need to have more than one word as a single string, this can be done but requires more work on your part. For example a text file might have personal record on a single line.
LastName, FirstName MiddleInitial Age Phone# Address
Cook, John S 33 1-888-323-4545 324 Complex Avenue
And you would want the 324 Complex Avenue to be in a single string also you don't want the comma stored after the last name. Your structure in code to store this info might look like this:
struct PersonalRecord {
std::string firstName;
std::string lastName;
char middleInitial;
unsigned age;
std::string phoneNumber;
std:string address;
};
What you would have to do is after you read this line in from your file on that same iteration of the while loop is you would have to do multiple parsing.
You would first start by using a temporary string and vector of strings and use the utility function splitString with the delimeter being the comma. So this would save 2 strings in the temp vector of strings the first being: Cook and the second being the rest of the line after the comma including the leading space. The reason you have the temp string and temp vector of strings is that you will need to pop values at when needed. So in this case we would have to do the following, but first how do we resolve the case with multiple words to one string? We can change the line of text in the text file to be enclosed with double quotes as such:
textfile
Cook, John S 33 1-888-323-4545 "324 Complex Avenue"
Evens, Sue A 24 1-888-323-6996 "128 Mission Rd"
Adams, Chris B 49 1-777-293-8234 "2304 Helms Drive"
Then parse it with this logic flow or algorithm.
main.cpp
#including "stdafx.h"
#including "Utility.h"
int main() {
using namespace util;
std::string strFilename( "personalRecord.txt" );
std::ifstream file;
std::string strLine;
std::vector<std::string> vTemp;
std::vector<std::string> vResult;
std::vector<PersonalRecord> vData;
// Open File For Reading
file.open( strFilename.c_str() );
// Check For Error Of Opening File
if ( !file.is_open() ) {
std::cout << "Error opening file (" << strFilename << ")" << std::endl;
return RETURN_ERROR;
}
// Continue Until End Of File
while( !file.eof() ) {
// Get Single Full Line Save To String
std::getline( file, strLine );
// Check For Comma
vTemp = Utility::splitString( strLine, ",");
// Save First String For Laster
std::string lastName = vTemp[0];
// Split String Using A Double Quote Delimiter Delimiter
vTemp = Utility::splitString( vTemp[1], "\"" );
// Check To See If vTemp Has More Than One String
if ( vTemp.size() > 1 ) {
// We Need To Use Pop Back To Account For Last Double Quote
vTemp.pop_back(); // Remove Last Double Quote
std::string temp = vTemp.back();
vTemp.pop_back(); // Remove Wanted String From vTemp.
// At This Point We Need To Parse vTemp Again Using Space Delimiter
vResult = Utility::splitString( vTemp[0], " " );
// Need To Account For Leading Space In Vector
vResult[0].erase();
// Need To Account For Last Space In Vector
vResult.pop_back();
// Now We Can Push Our Last String Back Into vResult
vResult.push_back( temp );
// Replace The First String " " With Our LastName
vResult[0] = lastName;
} else if ( vTemp.size() == 1 ) {
// Just Parse vTemp Using Space Delimiter
vResult = Utility::splitString( vTemp[0], " " );
}
// Print Out Results For Validity
for ( unsigned u = 0; u < vResult.size(); u++) {
std::cout << vResult.at(u) << " ";
}
std::cout << std::endl;
// Here Is Where You Would Populate Your Variables, Structures Or Classes On Each Pass Of The While Loop.
// With This Structure There Should Only Be 8 Entries Into Our vResult
PersonalRecord temp;
temp.lastName = vResult[0];
temp.firstName = vResult[1];
temp.middleInitial = vResult[2][0];
temp.age = Utility::convertToUnsigned( vResult[3] );
temp.phoneNumber = vResult[4];
temp.address = vResult[5];
vData.push_back( temp );
} // while
// Close File
file.close();
std::cout << std::endl << std::endl;
// Print Using Structure For Validity
std::cout << "---------------------------------------\n";
for ( unsigned u = 0; u < vData.size(); u++ ) {
std::cout << vData[u].lastName << " "
<< vData[u].firstName << " "
<< vData[u].middleInitial << " "
<< vData[u].age << " "
<< vData[u].phoneNumber << " "
<< vData[u].address << std::endl;
}
Utility::pressAnyKeyToQuit();
return RETURN_OK;
} // main
So both consideration and are has to be taken when parsing text or strings. You have to account for every single character including your carriage returns, spaces etc. So the format that the text file is written in has to be considered.
Yes the splitString()
will also parse tabs, you would just have to use "\t" for tabs, etc. Just remember that it will make a split at every occurrence. So if you have a sentence that has a colon ":" in it, but then you decide to use the colon as your delimiter between values, it will split that sentence as well. Now you could have different rules for each line of text from the file and if you know what line you are on you can parse each line accordingly. This is why most people prefer to write their code to read and parse binary, because it is much easier to program, then writing a text parser.
I chose to use the PersonalRecord structure to show you how you can extract strings from a line of text and to convert them to basic types such as int, float or double by using some of my other functions in my Utility class. All methods in this class are declared as static and the constructor is private, so the class name acts as a wrapper or a namespace so to speak. You can not create an instance of a Utility util; // invalid object
. Just include the header file and use the class name with the scope resolution operator ::
to access any of the functions and make sure you are using the namespace util
.