I went through enough of threads and post on this topic but somehow its not helping me add unicode support to my code. I have very simple task to do - read the Unicode file (.txt and csv) - Parse it and store the word as tokens in 2D array using some delimiters (, or " separated words) - Perform some operations on it - store these strings text file
Problem i am facing is some of my older code functions are not compatible i guess as i don't find substitute or i am able to compile them but no out put generated. This code works perfectly fine with ASCII but now i need unicode support for it.
It would be great if i get sample source code ,does not need to be whole big code but at least like how to get Unicode file parse it and store it in token and which functions to use for comparison etc,
I am pasting some part of code below , i did modify few things so may not compile in first go.
get the text file as input e.g. profiles.txt which is in unicode (UTF 16 - basically Chinese and Korean words in it)
// adding all std headers here
const int MAX_CHARS_PER_LINE = 4072;
const int MAX_TOKENS_PER_LINE = 1;
const wchar_t* const DELIMITER = L"\"";
class IntegrityCheck
{
public:
std::wstring Profile_Container[5000][4];
void Profile_PRD_Parser();
};
void IntegrityCheck::Profile_PRD_Parser()
{
std::wstring skip (L".exe");
std::wstring databoxtemp[1][1];
int a=-1;
// create a file-reading object
wifstream fin.open("profiles.txt"); //open a file
wofstream fout("out.txt"); // this dumps the parsing ouput
// read each line of the file
while (!fin.eof())
{
// read an entire line into memory
wchar_t buf[MAX_CHARS_PER_LINE];
fin.getline(buf, MAX_CHARS_PER_LINE);
// parse the line into blank-delimited tokens
int n = 0; // a for-loop index
// array to store memory addresses of the tokens in buf
const wchar_t* token[MAX_TOKENS_PER_LINE] = {}; // initialize to 0
// parse the line
token[0] = wcstok(buf, DELIMITER); // first token
if (token[0]) // zero if line is blank
{
for (n = 0; n < MAX_TOKENS_PER_LINE; n++) // setting n=0 as we want to ignore the first token
{
oken[n] = wcstok(0, DELIMITER); // subsequent tokens
if (!token[n]) break; // no more tokens
std::wstring str2 =token[n];
std::size_t found = str2.find(str); //substring comparison
if (found!=std::string::npos) // if its exe then it writes in Dxout for same app name on new line
{
a++;
Profile_Container[a][0]=token[n];
std::transform(Profile_Container[a][2].begin(), Profile_Container[a][2].end(), Profile_Container[a][2].begin(), ::tolower); //convert all data to lower
fout<<Profile_Container[a][0]<<"\t"<<Profile_Container[a][1]<<"\t"<<Profile_Container[a][2]<<"\n"; //write to file
}
}
}
}
fout.close();
fin.close();
}
int main()
{
IntegrityCheck p1;
p1.Profile_PRD_Parser();
}