I am getting kind of weird error when I tried to import my .mat file which is available at this link https://drive.google.com/drive/folders/19GmXlWGh4-u_GxntNvmlC1YD6XiIaXqn?usp=sharing. My matf ile has 40000 columns and 764 rows and mat file contains random values between 0 and 255. But each time when I run my code using this data set it gives memory error. can anyone guide me how I can deal with this error. I have 16 GB of ram and 2.1 Ghz processor and 500 ssd in my computer. Below is my code which I took from AZU that has all feature selection methods https://github.com/jundongl/scikit-feature/blob/master/skfeature/example/test_trace_ratio.py
import scipy.io
import scipy.io as sio
import os
import pandas as pd
from sklearn.model_selection import cross_validate
from sklearn import svm
from sklearn.metrics import accuracy_score
import trace_ratio
from sklearn.model_selection import KFold
from sklearn import model_selection
import scipy.io
import scipy.io
import os
import scipy.io as sio
# https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold
from sklearn.model_selection import KFold
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import StratifiedKFold
# pip install skfeature-chappers
# from skfeature.function.similarity_based import fisher_score
# from skfeature.function.similarity_based import trace_ratio
def main():
# load data'
source_dir = './'
mat = sio.loadmat(os.path.join(source_dir, '200by200.mat'))
# mat = scipy.io.loadmat('../data/COIL20.mat')
X = mat['X'] # data
X = X.astype(float)
y = mat['Y'] # label
y = y[:, 0]
n_samples, n_features = X.shape # number of samples and number of features
# split data into 10 folds
ss = KFold(n_splits=5)
#ss = StratifiedKFold(n_splits=5, shuffle=True, random_state=2)
# ss= RepeatedStratifiedKFold(n_splits=5, n_repeats=1, random_state=0)
# perform evaluation on classification task
num_fea = 50 # number of selected features
# clf = svm.LinearSVC() # linear SVM
clf = svm.SVC(kernel='rbf')
correct = 0
for train, test in ss.split(X,y):
# obtain the index of selected features
idx, feature_score, subset_score = trace_ratio.trace_ratio(X[train], y[train], num_fea, style='fisher')
# obtain the dataset on the selected features
selected_features = X[:, idx[0:num_fea]]
# train a classification model with the selected features on the training dataset
clf.fit(selected_features[train], y[train])
# print(selected_features[train])
# predict the class labels of test data
y_predict = clf.predict(selected_features[test])
# obtain the classification accuracy on the test data
acc = accuracy_score(y[test], y_predict)
correct = correct + acc
# output the average classification accuracy over all 10 folds
print('Accuracy:', float(correct) / 5)
if __name__ == '__main__':
main()