0

sorry this may be somewhat duplication, but i am not able to fix it. i am involved with handwritten OCR application. I use MNIST digit database for training process here. I use following codehere for read pixels from the database and re-create the image. programs doesnt give any error but it gives meaningless image(totally black and unclear pixel patterns) as output. can someone explain the reason for that? plz help

here is my code

int reverseInt(int i) {
unsigned char c1, c2, c3, c4;
c1 = i & 255;
c2 = (i >> 8) & 255;
c3 = (i >> 16) & 255;
c4 = (i >> 24) & 255;
return ((int)c1 << 24) + ((int)c2 << 16) + ((int)c3 << 8) + c4;
}

void create_image(CvSize size, int channels, unsigned char* data[28][28], int imagenumber) {
string imgname; ostringstream imgstrm;string fullpath;
imgstrm << imagenumber;
imgname=imgstrm.str();
fullpath="D:\\"+imgname+".jpg";

IplImage *imghead=cvCreateImageHeader(size, IPL_DEPTH_16S, channels);
imghead->imageData=(char *)data;
cvSaveImage(fullpath.c_str(),imghead);  
}
int main(){
ifstream file ("D:\\train-images.idx3-ubyte",ios::binary);
if (file.is_open())
{
    int magic_number=0; int number_of_images=0;int r; int c;
    int n_rows=0; int n_cols=0;CvSize size;unsigned char temp=0;

    file.read((char*)&magic_number,sizeof(magic_number)); 
    magic_number= reverseInt(magic_number);

    file.read((char*)&number_of_images,sizeof(number_of_images));
    number_of_images= reverseInt(number_of_images);

    file.read((char*)&n_rows,sizeof(n_rows));
    n_rows= reverseInt(n_rows);
    file.read((char*)&n_cols,sizeof(n_cols));
    n_cols= reverseInt(n_cols);
    unsigned char *arr[28][28];


    for(int i=0;i<number_of_images;++i)
    {
        for(r=0;r<n_rows;++r)
        {
            for(c=0;c<n_cols;++c)
            {                 
                file.read((char*)&temp,sizeof(temp));
                arr[r][c]= &temp;
            }           
        }
        size.height=r;size.width=c;
        create_image(size,1, arr, i);
    }
}
return 0;
}
Community
  • 1
  • 1
Heshan Sandeepa
  • 3,388
  • 2
  • 35
  • 45

2 Answers2

1

You have:

unsigned char temp=0;
...
file.read((char*)&temp,sizeof(temp));

With that you are reading a byte into a single char, and overwriting it with each subsequent byte in the file. When you do this:

create_image(size,3, &temp, i);

temp is only one character long and just contains the last byte in the file, so your image ends up being just whatever happens to be in memeory after temp. You need to allocate an array to hold the image data and increment a pointer into it as you fill it with data.

Also you are creating a 3 channel image, but the MNIST data is only single channel, right?

Also,

imghead->imageData=(char *)data;

should be

cvSetData(imghead, data, size.width)

and

unsigned char *arr[28][28];

should be

unsigned char arr[28][28];
Bull
  • 11,771
  • 9
  • 42
  • 53
1

I also wanted to use MNIST with OpenCV and this question was the closest i got.

I thought I post a "copy&paste->be happy" version based on cv::Mat instead of iplimage, since this is easier to work with. Also, cv::Mat is preferred since OpenCV 2.x. This method get you a vector of pairs of cv::Mat images and labels as ints. Have fun.

std::vector<std::pair<cv::Mat,int>> loadBinary(const std::string &datapath, const std::string &labelpath){
    std::vector<std::pair<cv::Mat,int>> dataset;
    std::ifstream datas(datapath,std::ios::binary);
    std::ifstream labels(labelpath,std::ios::binary);

    if (!datas.is_open() || !labels.is_open())
        throw std::runtime_error("binary files could not be loaded");

    int magic_number=0; int number_of_images=0;int r; int c;
    int n_rows=0; int n_cols=0; unsigned char temp=0;

    // parse data header
    datas.read((char*)&magic_number,sizeof(magic_number));
    magic_number=reverseInt(magic_number);
    datas.read((char*)&number_of_images,sizeof(number_of_images));
    number_of_images=reverseInt(number_of_images);
    datas.read((char*)&n_rows,sizeof(n_rows));
    n_rows=reverseInt(n_rows);
    datas.read((char*)&n_cols,sizeof(n_cols));
    n_cols=reverseInt(n_cols);

    // parse label header - ignore
    int dummy;
    labels.read((char*)&dummy,sizeof(dummy));
    labels.read((char*)&dummy,sizeof(dummy));

    for(int i=0;i<number_of_images;++i){
        cv::Mat img(n_rows,n_cols,CV_32FC1);

        for(r=0;r<n_rows;++r){
            for(c=0;c<n_cols;++c){
                datas.read((char*)&temp,sizeof(temp));
                img.at<float>(r,c) = 1.0-((float)temp)/255.0; // inverse 0.255 values
            }
        }
        labels.read((char*)&temp,sizeof(temp));
        dataset.push_back(std::make_pair(img,(int)temp));
    }
    return dataset;
}

just the same as above:

int reverseInt(int i) {
    unsigned char c1, c2, c3, c4;
    c1 = i & 255; c2 = (i >> 8) & 255; c3 = (i >> 16) & 255; c4 = (i >> 24) & 255;
    return ((int)c1 << 24) + ((int)c2 << 16) + ((int)c3 << 8) + c4;
}
Insa
  • 1,610
  • 12
  • 17