0

I tried to read a UTF-16 file to wstring use wfstream. But after dump the memory, i see it is not as i want. For example, the 'Chào' string in UTf-16 file is "FF FE 43 00 68 00 E0 00 6F 00" (using hex editor). The wstring:

[0]FF  (BOM)
[1]FE  (BOM)
[2]43
[3]00
[4]68
[5]E0
[6]00
[7]6F
[8]00

So with fstream::open, it is just read byte-to-byte and store it as wchar. But what i really want is a wstring with UTF-16 encoded, so the wstring should be:

[0]43
[1]68
[2]E0
[3]6F

SO how to read a UTF-16 file with correctly encoded with wfstream. thank for reading :D

user2477
  • 896
  • 2
  • 10
  • 23

2 Answers2

-1

Maybe you should try to change the encoding before reading with something like:

const std::locale AvailLocale
  = std::locale(std::locale("Russian"), new std::codecvt_utf16<wchar_t>());

wfstream myfile;
myfile.open(...);

Change the language "Russian" to your computer default language and it should work!

otorrillas
  • 4,357
  • 1
  • 21
  • 34
-1

It's because the BOM has to be written/read in binary whereas the text is just done in text mode..

You can use something like this to close/reopen the file or else do it manually.. Otherwide you might have to use C++11 or WinAPI.. The idea is to read/write the bom in binary mode and then read/write the file in text mode. It works that way. I've tested it. Otherwise you're going to have to do conversions.

#include <iostream>
#include <vector>
#include <fstream>

template<typename T, typename Traits = std::char_traits<T>>
class ModFStream
{
    private:
        std::string filepath;
        std::basic_fstream<T, Traits> stream;
        std::ios_base::openmode mode;

    public:
        ModFStream() : stream(), mode() {}
        ModFStream(const std::string &FilePath, std::ios_base::openmode mode) : filepath(FilePath), stream(FilePath, mode), mode(mode) {}
        ~ModFStream() {}

        inline std::basic_fstream<T, Traits>& get() {return stream;}

        void setmode(std::ios::openmode mode)
        {
            stream.close();
            stream.open(filepath, mode);
        }

        template<typename U>
        ModFStream& operator << (const U& other)
        {
            stream << other;
            return *this;
        }

        template<typename U>
        ModFStream& operator >> (U& other)
        {
            stream >> other;
            return *this;
        }
};

int main()
{
    wchar_t bom[] = L"\xFF\xFE";
    std::wstring str = L"Chào";

    ModFStream<wchar_t> stream("C:/Users/Brandon/Desktop/UTF16Test.txt", std::ios::out | std::ios::binary);
    stream << bom;
    stream.setmode(std::ios::out | std::ios::binary);
    stream << str;

    str.clear();
    stream.setmode(std::ios::in | std::ios::binary);
    stream >> bom[0] >> bom[1];

    stream.setmode(std::ios::in);
    stream >> str;

    std::wcout<<str;
}

You could write a WinAPI fstream simulator I guess..

#include <iostream>
#include <vector>
#include <locale>
#include <windows.h>

namespace win
{
    template<typename T>
    struct is_wide_char : std::false_type {};

    template<>
    struct is_wide_char<wchar_t> : std::true_type {};

    enum class open_mode
    {
        app = 1L << 0,
        ate = 1L << 1,
        bin = 1L << 2,
        in = 1L << 3,
        out = 1L << 4,
        trunc = 1L << 5
    };

    enum class seek_dir
    {
        beg = 1L << 0,
        cur = 1L << 1,
        end = 1L << 2
    };

    inline constexpr open_mode operator & (open_mode a, open_mode b) {return open_mode(static_cast<int>(a) & static_cast<int>(b));}
    inline constexpr open_mode operator | (open_mode a, open_mode b) {return open_mode(static_cast<int>(a) | static_cast<int>(b));}
    inline constexpr open_mode operator ^ (open_mode a, open_mode b) {return open_mode(static_cast<int>(a) ^ static_cast<int>(b));}
    inline constexpr open_mode operator~(open_mode a) {return open_mode(~static_cast<int>(a));}
    inline const open_mode& operator |= (open_mode& a, open_mode b) {return a = a | b;}
    inline const open_mode& operator &= (open_mode& a, open_mode b) {return a = a & b;}
    inline const open_mode& operator ^= (open_mode& a, open_mode b) {return a = a ^ b;}

    template<typename T>
    std::wstring to_wide_string(const T* str)
    {
        if (is_wide_char<T>::value)
            return std::wstring(str);

        std::wstring utf16 = std::wstring(std::mbstowcs(nullptr, reinterpret_cast<const char*>(str), 0), '\0');
        std::mbstowcs(&utf16[0], reinterpret_cast<const char*>(str), utf16.size());
        return utf16;
    }

    template<typename T>
    class WinFStream
    {
        private:
            open_mode mode;
            HANDLE hFile;
            bool binary_mode = false;

        public:
            WinFStream(const T* FilePath, open_mode mode = open_mode::in | open_mode::out) : mode(mode), hFile(nullptr), binary_mode(false)
            {
                unsigned int open_flags = 0;

                if (static_cast<int>(mode & open_mode::bin))
                {
                    binary_mode = true;
                }

                if (static_cast<int>(mode & open_mode::in))
                {
                    open_flags |= GENERIC_READ;
                }
                else if (static_cast<int>(mode & open_mode::app))
                {
                    open_flags |= FILE_APPEND_DATA;
                }

                if (static_cast<int>(mode & open_mode::out))
                {
                    open_flags |= GENERIC_WRITE;
                }

                std::wstring path = to_wide_string(FilePath);
                hFile = CreateFileW(path.c_str(), open_flags, 0, nullptr, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr);

                if (static_cast<int>(mode & open_mode::ate))
                {
                    SetFilePointer(hFile, 0, nullptr, FILE_END);
                }
            }

            ~WinFStream() {CloseHandle(hFile); hFile = nullptr;}

            inline std::size_t seekg(std::size_t pos, seek_dir from)
            {
                return SetFilePointer(hFile, pos, nullptr, static_cast<int>(from) - 1);
            }

            inline std::size_t tellg()
            {
                return GetFileSize(hFile, nullptr);
            }

            void close()
            {
                CloseHandle(hFile);
                hFile = nullptr;
            }

            template<typename U>
            inline std::size_t write(const U* str, std::size_t size)
            {
                long unsigned int bytes_written = 0;
                WriteFile(hFile, &str[0], size * sizeof(U), &bytes_written, nullptr);
                return bytes_written;
            }

            template<typename U>
            inline std::size_t read(U* str, std::size_t size)
            {
                long unsigned int bytes_read = 0;
                ReadFile(hFile, &str[0], size * sizeof(U), &bytes_read, nullptr);
                return bytes_read;
            }

            template<typename U>
            WinFStream& operator << (const U &other)
            {
                this->write(&other, 1);
                return *this;
            }

            template<typename U, std::size_t size>
            WinFStream& operator << (U (&str)[size])
            {
                this->write(&str[0], size);
                return *this;
            }

            template<typename U, typename Traits = std::char_traits<U>>
            WinFStream& operator << (const std::basic_string<U, Traits>& str)
            {
                this->write(str.c_str(), str.size());
                return *this;
            }

            template<typename U>
            WinFStream& operator >> (U &other)
            {
                this->read(&other, 1);
                return *this;
            }

            template<typename U, std::size_t size>
            WinFStream& operator >> (U (&str)[size])
            {
                this->read(&str[0], size);
                return *this;
            }

            template<typename U, typename Traits = std::char_traits<U>>
            WinFStream& operator >> (std::basic_string<U, Traits>& str)
            {
                unsigned int i = 0;
                std::vector<U> buffer(512, 0);

                while(true)
                {
                    long unsigned int bytes_read = 0;
                    bool result = ReadFile(hFile, &buffer[i], sizeof(U), &bytes_read, nullptr);

                    if (std::isspace(buffer[i]) || buffer[i] == '\r' || buffer[i] == '\n')
                        break;

                    ++i;

                    if (bytes_read != sizeof(U) || !result)
                        break;
                }

                str.append(buffer.begin(), buffer.begin() + i);
                return *this;
            }
    };

    typedef WinFStream<wchar_t> WinFStreamW;
    typedef WinFStream<char> WinFStreamA;

}


using namespace win;

int main()
{
    unsigned char bom[2] = {0XFF, 0xFE};
    std::wstring str = L"Chào";

    WinFStreamW File(L"C:/Users/Brandon/Desktop/UTF16Test.txt");
    File << bom;
    File << str;


    File.seekg(0, win::seek_dir::beg);

    std::wstring str2;
    File>>bom;
    File>>str2;

    std::wcout<<str2;
}

I know, it's dirty and doesn't work the exact same as fstream but it was worth my time "trying" to simulate it..

But again, my operator << and >> aren't "equivalent" to std::fstream's..

You're probably better off just using CreateFileW, ReadFile, WriteFile or re-opening the file in text mode after writing the bom in binary mode..

Brandon
  • 22,723
  • 11
  • 93
  • 186
  • According to this http://stackoverflow.com/a/10509465/886887 if you try to read UTF-16 text in text mode you'll get messed up by the character translations. Certainly I can't see any reason why binary mode wouldn't work; you are, after all, wanting to read the bytes in verbatim which is what binary mode does. – Harry Johnston Apr 14 '14 at 01:48