4

sometimes when you copy code from a document it gets line numbers and strange quotes. I've written a script to remove those initial numbers but it is very hard to find a way to remove those strange quotes ‘’“” so I've included my full code. It reads in a file and puts out a formatted file. But the compiler warns that these quotes are multi characters, which I guess means non standard ascii chars. It kinda works but it's not a great solution. Any help appreciated:

#include <iostream>
#include <fstream>
#include <string>

using namespace std;

string replaceChar(string str, char ch1, char ch2);

// Main
int main(int argc, char *argv[]) {

    string line;

    fstream stri, stro;
    // ifstream in
    stri.open(argv[1], ios::in);
    if(stri.fail()){
        cerr << "File failed to open for input" << endl;
        return 1;
    }

    // ofstream out
    stro.open("file_out.txt", ios::out);
    if(stro.fail()){
        cerr << "File failed to open for output" << endl;
        return 1;
    }

    // Read - Write
    //stri.get(c);
    getline(stri, line, '\n');
    while(!stri.eof()){
        // Remove numbers
        line.erase(0,3);

        //line.replace( line.begin(), line.end(), "‘", "\'" );
        //line.replace( line.begin(), line.end(), "’", "\'" );
        //line.replace( line.begin(), line.end(), "“", "\'" );
        //line.replace( line.begin(), line.end(), "”", "\'" );
        line = replaceChar(line, '‘','\'');
        line = replaceChar(line, '’','\'');
        line = replaceChar(line, '“','\"');
        line = replaceChar(line, '”','\"');

        stro << line << endl;
        getline(stri, line, '\n');
    }

    // Close files
    stri.close();
    stro.close();

    // Output
    cout << "File Edited Ok!";
    //cout << count -1 << " characters copied."<< endl; 
}

string replaceChar(string str, char ch1, char ch2) {
  for (int i = 0; i < str.length(); ++i) {
    if (str[i] == ch1)
      str[i] = ch2;
  }

  return str;
}
nif
  • 3,342
  • 20
  • 18
Conor Ryan
  • 159
  • 1
  • 2
  • 12
  • 1
    C++11 does have unicode support. Have a look at that. A starting point: http://stackoverflow.com/questions/6796157/unicode-encoding-for-string-literals-in-c11 – Johan Lundberg Jun 30 '13 at 10:53
  • I saw `while (!eof)` an stopped reading... – Kerrek SB Jun 30 '13 at 11:17
  • it's done right in this code, but it's a common error: eof is only set after you read behind eof in C++, so many people end up with one invalid data item. In your code, you could just use while(std::getline(stri, line)) and remove the two other getline calls. – bennofs Jun 30 '13 at 11:30
  • Yes, I will try while(std::getline(stri, line)) as it looks neater, thanks! But back to the actual problem, ha ha... – Conor Ryan Jun 30 '13 at 11:36
  • A char usually holds an 8-bit value. The warning is saying those characters are not in the 8-bit character set. Type char promotes to type int, so there is usually no loss of bits. But if you are doing string searches you need special code to handle non-ASCII characters. – brian beuning Jun 30 '13 at 12:40
  • I solved this problem in PHP but would be interested if anyone can solve it on C or C++ – Conor Ryan Jun 30 '13 at 12:58
  • Thanks Brian, I thought as much. As I said I rewrote this in PHP and solved it, but am intrigued to see a solution in C++. – Conor Ryan Jun 30 '13 at 13:01

2 Answers2

3

Ok, it ain't pretty, but it works. Anyone want to refine searching for one of those damned strange quote marks be my guest!

#include <iostream>
#include <fstream>
#include <string>

using namespace std;

// Function Declaration
bool replace(string& str, const string& from, const string& to);

bool checkMyLine(string line);

// Main
int main(int argc, char *argv[]) {

    // line to edit
    string line;

    fstream stri, stro;
    // ifstream in
    stri.open(argv[1], ios::in);
    if(stri.fail()){
        cerr << "File failed to open for input" << endl;
        return 1;
    }

    // ofstream out
    stro.open("file_out.txt", ios::out);
    if(stro.fail()){
        cerr << "File failed to open for output" << endl;
        return 1;
    }

    // Read - Write
    while(getline(stri, line, '\n')){

        // Remove numbers at start of each line followed by space, eg: "001: "
    int i;
    for(i = 0;i < line.length();i++)
    {
        if(line[i] == ' ') break;
    }
    line.erase(0,i+1);

        //Replace Odd Chars
        for(int i=0;i<line.length();i++)
        {
        replace(line, "\u2018","\u0027");   // replaces ‘
        replace(line, "\u2019","\u0027");   // replaces ’
        replace(line, "\u201C","\u0022");   // replaces “
        replace(line, "\u201D","\u0022");   // replaces ”
        }

        // Write to file
        stro << line << endl;
    }

    // Close files
    stri.close();
    stro.close();

    // Output Message
    cout << "File Edited Ok!";
}// End of Main
//
bool replace(string& str, const string& from, const string& to) 
{
    size_t start_pos = str.find(from);
    if(start_pos == string::npos)
        return false;
    str.replace(start_pos, from.length(), to);
    return true;
}
Conor Ryan
  • 159
  • 1
  • 2
  • 12
0

What kind of script did you write to remove the leading numbers? Do you have access to sed or tr? They exist for just this kind of problem.

sed -e 's/[‘’“”]//g'

No need to re-invent the wheel

LogicG8
  • 1,767
  • 16
  • 26