2

The tofile() method of numpy array was used to write a unsigned 32bit integer array as a binary file (test.bin). Then, c++ was used to read uint32_t from the file. The problem is the c++ program didn't read the correct value. How to fix that?

Hex editor shows the test.bin is correct because the value increases one by one, as expected.

fileio.py:

import numpy as np
x = np.arange(20)+65536
y = x.astype(np.uint32)
print(y)
with open("test.bin", "wb+") as f:
    y.tofile(f)

output of fileio.py:

[65536 65537 65538 65539 65540 65541 65542 65543 65544 65545 65546 65547
 65548 65549 65550 65551 65552 65553 65554 65555]

test.bin: (in hex editor. vim with %!xxd command)

0000000: 0000 0100 0100 0100 0200 0100 0300 0100  ................
0000010: 0400 0100 0500 0100 0600 0100 0700 0100  ................
0000020: 0800 0100 0900 0100 0a00 0100 0b00 0100  ................
0000030: 0c00 0100 0d00 0100 0e00 0100 0f00 0100  ................
0000040: 1000 0100 1100 0100 1200 0100 1300 0100  ................
0000050: 0a                                       .   

fileio.cpp:

#include <iostream>
#include <vector>
#include <string>
#include <fstream>
#include <byteswap.h>

using namespace std;

int main(void){
    vector<char> arr;
    char n;
    int i;
    int num_char=0;
    //ifstream f("test.bin");
    fstream f;
    f.open("test.bin", ios::in | ios::binary);

    while(f>>n){
        cout << int(n) << ' ';
        arr.push_back(n);
        num_char++;
    }
    cout << endl;
    cout << "number of characters: " << num_char << endl;
    size_t uint32t_size = arr.size() * sizeof(char) / sizeof(uint32_t);
    uint32_t* ptr_arr0 = (uint32_t*)arr.data();

    cout << "uint32_t size: " << uint32t_size << endl;
    cout << "uint32_t interpretatin:" << endl;
    for (i = 0; i < uint32t_size; i++){
        cout << ptr_arr0[i] << ' ';
    }
    cout << endl;

    cout << "32bit byteswap: " << endl;
    for (i=0; i < uint32t_size; i++){
        ptr_arr0[i] = bswap_32(ptr_arr0[i]);
        cout << ptr_arr0[i] << ' ';
    }
    cout << endl;
    cout << "characters after byte swap: " << endl;
    for (i=0; i < num_char; i++){
        cout << int(arr[i]) << ' ';
    }
    cout << endl;

}   

output of fileio.cpp:

0 0 1 0 1 0 1 0 2 0 1 0 3 0 1 0 4 0 1 0 5 0 1 0 6 0 1 0 7 0 1 0 8 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 14 0 1 0 15 0 1 0 16 0 1 0 17 0 1 0 18 0 1 0 19 0 1 0 
number of characters: 75
uint32_t size: 18
uint32_t interpretatin:
65536 65537 65538 65539 65540 65541 65542 65543 65544 256 16777217 65536 234881280 251658496 268435712 285212928 301990144 318767360 
32bit byteswap: 
256 16777472 33554688 50331904 67109120 83886336 100663552 117440768 134217984 65536 16777217 256 65550 65551 65552 65553 65554 65555 
characters after byte swap: 
0 1 0 0 0 1 0 1 0 1 0 2 0 1 0 3 0 1 0 4 0 1 0 5 0 1 0 6 0 1 0 7 0 1 0 8 0 0 1 0 1 0 0 1 0 1 0 0 14 0 1 0 15 0 1 0 16 0 1 0 17 0 1 0 18 0 1 0 19 0 1 0 0 1 0 

It seems that byte swapping is not the only problem. The c++ program didn't even read all the 8bit integers correctly, as in:

... 1 0 8 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 14 ...

There is no number 9,10,11,12,13, despite these numbers exist in hex editor view of test.bin.

Possibly related questions:

Update:

readbinary.cpp It is from reading the binary file into the vector of unsigned chars

#include <iostream>
#include <vector>
#include <string>
#include <fstream>
#include <byteswap.h>
#include <iterator>
using namespace std;

typedef unsigned char BYTE;
std::vector<BYTE> readFile(const char* filename)
{
    // open the file:
    std::ifstream file(filename, std::ios::binary);

    // Stop eating new lines in binary mode!!!
    file.unsetf(std::ios::skipws);

    // get its size:
    std::streampos fileSize;

    file.seekg(0, std::ios::end);
    fileSize = file.tellg();
    file.seekg(0, std::ios::beg);

    // reserve capacity
    std::vector<BYTE> vec;
    vec.reserve(fileSize);

    // read the data:
    vec.insert(vec.begin(),
               std::istream_iterator<BYTE>(file),
               std::istream_iterator<BYTE>());

    return vec;
}


int main(void){
    std::vector<BYTE>  arr = readFile("test.bin");
    int i;
    for (i=0;i<arr.size();i++){
        cout << int(arr[i]) << ' ';
    }
} 

output of readbinary.cpp (it has 9,10,11,12).

0 0 1 0 1 0 1 0 2 0 1 0 3 0 1 0 4 0 1 0 5 0 1 0 6 0 1 0 7 0 1 0 8 0 1 0 9 0 1 0 10 0 1 0 11 0 1 0 12 0 1 0 13 0 1 0 14 0 1 0 15 0 1 0 16 0 1 0 17 0 1 0 18 0 1 0 19 0 1 0
Community
  • 1
  • 1
rxu
  • 1,369
  • 1
  • 11
  • 29

0 Answers0