I'm trying to do a multiclass classification project on COVID-19 imbalanced dataset using jupyter notebooks on kaggle, my code is :
import random
import gc
import numpy as np
from numpy import asarray
import itertools
train_dir='/kaggle/input/pandemic2/Training/Training'
test_dir='/kaggle/input/pandemic2/Testing/Testing'
train_covid=['/kaggle/input/pandemic2/Training/Training/{}'.format(i) for i in os.listdir(train_dir) if 'COVID' in I]
train_normal=['/kaggle/input/pandemic2/Training/Training/{}'.format(i) for i in os.listdir(train_dir) if 'Normal' in I]
train_pneumonia=['/kaggle/input/pandemic2/Training/Training/{}'.format(i) for i in os.listdir(train_dir) if 'MERS' or
'SARS'or
'Bacterial' or
'Chlamydia' or
'Influenza' or
'Klebsiella' or
'Legionella' or
'Mycoplasma' or
'Pneumocystis' or
'Streptococcus' or
'Varicella' in I]
test_imgs=['/kaggle/input/pandemic2/Testing/Testing/{}'.format(i) for i in os.listdir(test_dir)]
train_imgs=train_covid[:] + train_normal[:] + train_pneumonia[:]
random.shuffle(train_imgs)
del train_covid
del train_normal
del train_pneumonia
gc.collect()
nrows=150
ncolumns=150
channels= 3
def read_and_process_image (list_of_images):
x=[]
y=[]
for image in list_of_images:
x.append(cv2.resize(cv2.imread(image, cv2.IMREAD_COLOR),(nrows, ncolumns), interpolation=cv2.INTER_CUBIC))
if 'Normal' in image:
y.append(0)
elif 'COVID' in image:
y.append(1)
else:
y.append(2)
return x,y
x,y= read_and_process_image(train_imgs)
del train_imgs
gc.collect()
x=np.array(x)
print(x.shape)
y=np.array(y)
print(y.shape)
import sklearn
from keras import layers
from keras import models
from keras import optimizers
from sklearn.model_selection import StratifiedKFold
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array, load_img
from sklearn.model_selection import cross_val_score
from sklearn.metrics import balanced_accuracy
from keras.applications import InceptionResNetV2
conv_base= InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(150,150,3))
model=models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(3, activation='softmax'))
from keras import optimizers
model.compile(loss='categorical_crossentropy',optimizer=optimizers.Adam(lr=1e-4), metrics= ['categorical_accuracy'])
train_datagen=ImageDataGenerator(rescale=1./255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True )
val_datagen=ImageDataGenerator(rescale=1./255)
skf = StratifiedKFold(n_splits=5, shuffle=True,random_state=1)
scores = cross_val_score(model, x, y, cv=5, scoring= "balanced_accuracy")
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test=train_test_split(x,y, stratify=y, test_size=0.20, random_state=2)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, stratify=y_train, train_size=0.8, random_state=2)
from keras.utils import to_categorical
y_train=to_categorical(y_train,3)
print('Shape of training lables is:', y_train.shape)
y_val=to_categorical(y_val,3)
print('Shape of validation lables is:', y_val.shape)
y_test=to_categorical(y_test,3)
print('Shape of test labels is:', y_test.shape)
for index, (train_indices, val_indices) in enumerate(skf.split(x, y)):
print ("Training on fold " + str(index+1) + "/" + str(n_splits))
# Generate batches from indices
xtrain, xval = x[train_indices], x[val_indices]
ytrain, yval = y[train_indices], y[val_indices]
ntrain=len(x_train)
nval=len(x_val)
batch_size=32
train_generator=train_datagen.flow(x_train,y_train, batch_size=batch_size)
val_generator=val_datagen.flow(x_val,y_val,batch_size=batch_size)
print ("Training new iteration on " + str(xtrain.shape[0]) + " training samples, " + str(xval.shape[0]) + " validation samples, this may take while...")
history=model.fit(train_generator,
steps_per_epoch=ntrain//batch_size,
epochs=80,
validation_data=(val_generator),
validation_steps=nval//batch_size,
verbose=2)
print('\nBalanced Accuracy:', mterics.balanced_accurcay*100, '%')
when I run the code it gives me the following error:
Traceback (most recent call last)
<ipython-input-7-45c4c9070141> in <module>
6 from keras.preprocessing.image import img_to_array, load_img
7 from sklearn.model_selection import cross_val_score
----> 8 from sklearn.metrics import balanced_accuracy
9
10 #Download the model
ImportError: cannot import name 'balanced_accuracy' from 'sklearn.metrics' (/opt/conda/lib/python3.7/site-packages/sklearn/metrics/__init__.py)
I've tried so many solutions like 1, 2, 3 and 4 but this solution has lead me to another complicated issue because whenever I run the command conda activate myenv
it gives me the error:
CommandNotFoundError: Your shell has not been properly configured to use 'conda activate'.
To initialize your shell, run
$ conda init <SHELL_NAME>
Currently supported shells are:
- bash
- fish
- tcsh
- xonsh
- zsh
- powershell
See 'conda init --help' for more information and options.
IMPORTANT: You may need to close and restart your shell after running 'conda init'.
Tackling this issue by understanding it thoroughly and trying what is provided in this thread lead me to the following error message:
/bin/bash: -c: line 0: syntax error near unexpected token `newline' /bin/bash: -c: line 0: `/opt/conda/bin/conda init <bash>'.
I have tried these solutions, 1 and 2 but had no luck !!
Then, when I got stuck and felt trapped, I tried to follow the conda official documentation to create a virtual environment with all the needed packages but I ran into the same error above stating that my shell is not properly configured to activate the new virtualenv !!
Again, I went back to do the simple solutions by reverting to the first version of my base environment by doing conda list revisions
conda install --revision=0
and updating it but the error persists and still preventing me from using balanced_accuracy and other useful metrics.
I also tried to create a new jupyter notebook and start from scratch by updating my packages but they were already updated to the most recent versions !!
I believe that I'm running the set of configuration commands in the wrong order because my jupyter kernel and environment is a complete mess now.
It would be highly appreciated if someone can guide me to the best practices in setting up the environments for a deep learning task.
BTW: the solutions suggested manipulating the bashrc
file are a bit confusing to me and I don't fully understand how it works.