11

I have converted my image into a csv file and it's like a matrix but I want it to be a single row. How can I convert all of the images in dataset into a csv file (each image into one line).

Here's the code I've used:

from PIL import Image
import numpy as np
import os, os.path, time

format='.jpg'
myDir = "Lotus1"
def createFileList(myDir, format='.jpg'):
    fileList = []
    print(myDir)
    for root, dirs, files in os.walk(myDir, topdown=False):
            for name in files:
               if name.endswith(format):
                  fullName = os.path.join(root, name)
                  fileList.append(fullName)
                  return fileList

fileList = createFileList(myDir)
fileFormat='.jpg'
for fileFormat in fileList:
 format = '.jpg'
 # get original image parameters...
 width, height = fileList.size
 format = fileList.format
 mode = fileList.mode
 # Make image Greyscale
 img_grey = fileList.convert('L')
 # Save Greyscale values
 value = np.asarray(fileList.getdata(),dtype=np.float64).reshape((fileList.size[1],fileList.size[0]))
 np.savetxt("img_pixels.csv", value, delimiter=',')

input : http://uupload.ir/files/pto0_lotus1_1.jpg

output:http://uupload.ir/files/huwh_output.png

Nebula
  • 159
  • 2
  • 2
  • 16

5 Answers5

20

From your question, I think you want to know about numpy.flatten(). You want to add

value = value.flatten()

right before your np.savetxt call. It will flatten the array to only one dimension and it should then print out as a single line.

The rest of your question is unclear bit it implies you have a directory full of jpeg images and you want a way to read through them all. So first, get a file list:

def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
    for name in files:
        if name.endswith(format):
            fullName = os.path.join(root, name)
            fileList.append(fullName)
return fileList

The surround your code with a for fileName in fileList:

Edited to add complete example Note that I've used csv writer and changed your float64 to ints (which should be ok as pixel data is 0-255

from PIL import Image
import numpy as np
import sys
import os
import csv

#Useful function
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
    for name in files:
        if name.endswith(format):
            fullName = os.path.join(root, name)
            fileList.append(fullName)
return fileList

# load the original image
myFileList = createFileList('path/to/directory/')

for file in myFileList:
    print(file)
    img_file = Image.open(file)
    # img_file.show()

    # get original image parameters...
    width, height = img_file.size
    format = img_file.format
    mode = img_file.mode

    # Make image Greyscale
    img_grey = img_file.convert('L')
    #img_grey.save('result.png')
    #img_grey.show()

    # Save Greyscale values
    value = np.asarray(img_grey.getdata(), dtype=np.int).reshape((img_grey.size[1], img_grey.size[0]))
    value = value.flatten()
    print(value)
    with open("img_pixels.csv", 'a') as f:
        writer = csv.writer(f)
        writer.writerow(value)
Pam
  • 1,146
  • 1
  • 14
  • 18
  • Hi @Pam, I have 285 images and I want to convert it to a csv file, and I want to have each image to be one row of csv file. In a nutsell I want to convert the images to feature vector for other tasks. – Nebula Mar 02 '18 at 14:32
  • My answer should give you all the code you need. I've assumed all your images have the same height and width, though. If not, you might want to add "width, height" on the start of each line in the csv file. – Pam Mar 02 '18 at 14:50
  • Dear @Pam, I dont have much experience in python. I've edited my code. Is it possible for you to check my code again? – Nebula Mar 02 '18 at 16:30
  • @Zeinab How will you know the images' dimensions if they're saved as one-dimensional arrays though? you'd need to add some kind of header. – Nyerguds Mar 03 '18 at 19:13
  • Also, I'd rather save as hex values; that'll make them more uniform and compact. – Nyerguds Mar 03 '18 at 19:15
  • You could use bytearray to convert to bytes (which is useful for ACTUAL images) but when the pixels are in a .csv like this, it’s a moot point. – Pam Mar 03 '18 at 19:30
  • Ok @Pam the code worked well, but the number of columns in csv file are too exceeded, so I want to remove zero columns before writting in csv file. (I've put an instance of input in the question) – Nebula Mar 05 '18 at 10:11
  • 1
    This isn't a good idea - you want to remove black pixels with no way of putting them back in? The images would be completely destroyed. How big are your images? The clever thing to do would be to resize them. The number of columns is (width x height), from that, you can work out how small your images need to be. – Pam Mar 05 '18 at 10:47
  • Also, what is the purpose of the csv file? What will you do with the data. – Pam Mar 05 '18 at 10:48
  • @Pam It's a letter recognition task and there are 284 images, and 19 classes. I wanna apply naive bayesian. First I have to convert each image to feature vector (this csv file) and for reducing extra info I should use some feature selection code like removing zero columns and the pixels that are black. but I dont how to do this:\ BTW the images are not the same size. Here's an instance: http://uupload.ir/files/pto0_lotus1_1.jpg – Nebula Mar 05 '18 at 11:25
  • Ah, ok, got you. You want to *crop* the images so that only the interesting parts remain (remove any extra black borders). Do this before you apply "flatten". First scan through the numpy array (or PIL image data) and figure out the borders you want to crop. Then you can use PIL crop to crop the image. THEN (if I were you) I'd resize all the images to be the same size. But if you don't know how to do it, it might be worth starting a new question. – Pam Mar 05 '18 at 11:53
  • Ok @Pam , since I'm not much experienced in python I'll ask a new question and put my latest code and tell me how to do this crop stuff please:) – Nebula Mar 05 '18 at 12:11
  • It might not be me that answers it - you might get a better answer more quickly from someone else! – Pam Mar 05 '18 at 12:26
  • @Pam, Yeah I know that :D – Nebula Mar 05 '18 at 12:32
  • @Pam Would you mind answering my question? https://stackoverflow.com/questions/49110560/feature-selection-using-python – Nebula Mar 06 '18 at 09:55
  • @Pam Would you mind editing your code so that one who uses it doesn't do it every single time? ex: `fileList` isn't recognized at line `for file in fileList:`Just saying. – bit_scientist Jan 30 '20 at 01:56
6

How about you convert your images to 2D numpy arrays and then write them as txt files with .csv extensions and , as delimiters?

Maybe you could use a code like following:

np.savetxt('np.csv', image, delimiter=',')
Bedir Yilmaz
  • 3,823
  • 5
  • 34
  • 54
6
import numpy as np
import cv2
import os

IMG_DIR = '/home/kushal/Documents/opencv_tutorials/image_reading/dataset'

for img in os.listdir(IMG_DIR):
        img_array = cv2.imread(os.path.join(IMG_DIR,img), cv2.IMREAD_GRAYSCALE)

        img_array = (img_array.flatten())

        img_array  = img_array.reshape(-1, 1).T

        print(img_array)

        with open('output.csv', 'ab') as f:

            np.savetxt(f, img_array, delimiter=",")
brooksrelyt
  • 3,925
  • 5
  • 31
  • 54
k.dhakal
  • 69
  • 1
  • 2
3
import os
import pandas as pd

path = 'path-to-the-folder'
os.chdir(path)
lists = os.listdir(path)
labels = []
file_lst = []

for folder in lists:
    files = os.listdir(path +"/"+folder)
    for file in files:
      path_file = path + "/" + folder + "/" + file
      file_lst.append(path_file)
      labels.append(folder)

dictP_n = {"path": file_lst,
           "label_name": labels,
          "label": labels}   

data  = pd.DataFrame(dictP_n, index = None)
data = data.sample(frac=1)
data['label'] = data['label'].replace({"class1": 0, "class2": 1 })
data.to_csv("path-to-save-location//file_name.csv", index =None)
-1
from logging import root

from PIL import Image
import numpy as np
import sys
import os
import csv

def createfilelist(myDir, format='.jpg'):
    fileList= []
    print(myDir)
    for roots,dirs,files in os.walk(myDir,topdown=False):
        for name in files:
            if name.endswith(format):
                fullname = os.path.join(myDir,name)
                fileList.append(fullname)

    return fileList

myFileList = createfilelist('C:/Users/Rahul/Desktop/CASIA2/Au')

for file in myFileList:
    print(file)
    img_file = Image.open(file)
    width, height = img_file.size
    format = img_file.format
    mode = img_file.mode
    img_grey  = img_file.convert('L')

    value = np.asarray(img_grey.getdata(), dtype=np.int).reshape((img_grey.size[1], img_grey.size[0]))
    value = value.flatten()
    print(value)

    with open("image_to_csv.csv", 'a') as f:
        writer = csv.writer(f)
        writer.writerow(value)
ChrisGPT was on strike
  • 127,765
  • 105
  • 273
  • 257