2

So we have such function:

std::string url_encode_wstring(const std::wstring &input)
     {
         std::string output;
         int cbNeeded = WideCharToMultiByte(CP_UTF8, 0, input.c_str(), -1, NULL, 0, NULL, NULL);
         if (cbNeeded > 0) {
             char *utf8 = new char[cbNeeded];
             if (WideCharToMultiByte(CP_UTF8, 0, input.c_str(), -1, utf8, cbNeeded, NULL, NULL) != 0) {
                 for (char *p = utf8; *p; *p++) {
                     char onehex[5];
                     _snprintf(onehex, sizeof(onehex), "%%%02.2X", (unsigned char)*p);
                     output.append(onehex);
                 }
             }
             delete[] utf8;
         }
         return output;
     }

Its grate for windows but I wonder how (and is it possible) to make it work under linux?

Community
  • 1
  • 1
Rella
  • 65,003
  • 109
  • 363
  • 636
  • 1
    Use `wcstombs` and `mbstowcs`. Take a look [at this answer](http://stackoverflow.com/questions/7141260/compare-stdwstring-and-stdstring/7159944#7159944) for some example code. – Kerrek SB Aug 26 '11 at 20:59

1 Answers1

3

IMHO you should use a portable character codec library. Here's an example of minimal portable code using iconv, which should be more than enough. It's supposed to work on Windows (if it does, you can get rid of your windows-specific code altogether). I follow the GNU guidelines not to use the wcstombs & co functions ( https://www.gnu.org/s/hello/manual/libc/iconv-Examples.html ) Depending on the use case, handle errors appropriately... and to enhance performance, you can create a class out of it.

#include <iostream>

#include <iconv.h>
#include <cerrno>
#include <cstring>
#include <stdexcept>

std::string wstring_to_utf8_string(const std::wstring &input)
{
    size_t in_size = input.length() * sizeof(wchar_t);
    char * in_buf = (char*)input.data();
    size_t buf_size = input.length() * 6; // pessimistic: max UTF-8 char size
    char * buf = new char[buf_size];
    memset(buf, 0, buf_size);
    char * out_buf(buf);
    size_t out_size(buf_size);
    iconv_t conv_desc = iconv_open("UTF-8", "wchar_t");
    if (conv_desc == iconv_t(-1))
        throw std::runtime_error(std::string("Could not open iconv: ") + strerror(errno));
    size_t iconv_value = iconv(conv_desc, &in_buf, &in_size, &out_buf, &out_size);
    if (iconv_value == -1)
        throw std::runtime_error(std::string("When converting: ") + strerror(errno));
    int ret = iconv_close(conv_desc);
    if (ret != 0)
        throw std::runtime_error(std::string("Could not close iconv: ") + strerror(errno));
    std::string s(buf);
    delete [] buf;
    return s;
 }


int main() {
    std::wstring in(L"hello world");
    std::wcout << L"input: [" << in << L"]" << std::endl;
    std::string out(wstring_to_utf8_string(in));
    std::cerr << "output: [" << out << "]" << std::endl;
    return 0;
}
cJ Zougloub
  • 1,484
  • 10
  • 19
  • IMHO many of the objections against `wctombs` don't apply to `std::locale` and co. Using `iconv` is good advice, though. – jpalecek Oct 01 '11 at 21:11