I have been trying to use sklearn's GridSearchCV in the following script multiple types (once for every logistic regression, 3 times total). What ends up happening is that the first GridSearch for the first logistic regression is completed, and when the second gridsearch is about to begin, the terminal just hangs and nothing happens.
I am using Keras for logistic regression
I would love some feedback, as this problem is kind of annoying.
PS. This is my first time posting, so i am happy to provide additional information if needed.
Here's the script:
def braf():
mut_pred=mutation_prediction(X_train_all_genes, Y_train_all_genes, X_valid=X_test_all_genes, Y_valid=Y_test_all_genes)
print('Starting BRAF...')
BRAF_history= History()
braf_estimator = KerasClassifier(build_fn=mut_pred.braf_model, epochs=30, batch_size=15, verbose=0)
braf_param_grid = dict(braf_learning_rate =
list(np.linspace(0,0.0001, num=5)), braf_lasso_rate =
list(np.linspace(0,0.0001, num=5)))
braf_grid = GridSearchCV(estimator=braf_estimator, cv=2,
param_grid=braf_param_grid, n_jobs=30,pre_dispatch=5)
braf_grid_result = braf_grid.fit(X_train_all_genes.values, Y_train_all_genes['BRAF_mutant'].values,callbacks=[BRAF_history])
print('Done with BRAF')
plot_loss(BRAF_history.history['loss'], title='BRAF LOSS')
plot_accuracy(BRAF_history.history['acc'], title='BRAF Accuracy')
BRAF_pred=list(map(lambda x:int(x),braf_grid.predict(X_test_all_genes.values)))
return BRAF_pred
def kras():
mut_pred=mutation_prediction(X_train_all_genes, Y_train_all_genes, X_valid=X_test_all_genes, Y_valid=Y_test_all_genes)
print('Starting KRAS...')
KRAS_history= History()
kras_estimator = KerasClassifier(build_fn=mut_pred.kras_model, epochs=30, batch_size=15, verbose=0)
kras_param_grid = dict(kras_learning_rate =
list(np.linspace(0,0.0001, num=10)), kras_lasso_rate =
list(np.linspace(0,0.0001, num=10)))
kras_grid = GridSearchCV(estimator=kras_estimator, cv=2, param_grid=kras_param_grid, n_jobs=30,pre_dispatch=5)
kras_grid_result = kras_grid.fit(X_train_all_genes.values, Y_train_all_genes['KRAS_mutant'].values,callbacks=[KRAS_history])
print('Done with KRAS')
plot_loss(KRAS_history.history['loss'], title='KRAS LOSS')
plot_accuracy(KRAS_history.history['acc'], title='KRAS Accuracy')
KRAS_pred=list(map(lambda x:int(x),kras_grid.predict(X_test_all_genes.values)))
return KRAS_pred
def tp53():
mut_pred=mutation_prediction(X_train_all_genes, Y_train_all_genes, X_valid=X_test_all_genes, Y_valid=Y_test_all_genes)
print('Starting TP53...')
TP53_history= History()
tp53_estimator = KerasClassifier(build_fn=mut_pred.tp53_model, epochs=30, batch_size=15, verbose=0)
tp53_param_grid = dict(tp53_learning_rate =
list(np.linspace(0,0.001, num=10)), tp53_lasso_rate =
list(np.linspace(0,0.0001, num=10)))
tp53_grid = GridSearchCV(estimator=tp53_estimator, cv=2, param_grid=tp53_param_grid, jobs=30,pre_dispatch=5)
tp53_grid_result = tp53_grid.fit(X_train_all_genes.values, Y_train_all_genes['TP53_mutant'].values,callbacks=[TP53_history])
print('Done with TP53')
plot_loss(TP53_history.history['loss'], title='TP53 LOSS')
plot_accuracy(TP53_history.history['acc'], title='TP53 Accuracy')
TP53_pred=list(map(lambda
x:int(x),tp53_grid.predict(X_test_all_genes.values)))
In my main()
i call the above functions to do LR on these three genes and return the prediction with the best found combination of learning rate and lasso variable.
Any feedback would be helpful
UPDATE When i interrupt the process i get the following:
Process ForkPoolWorker-58:
Process ForkPoolWorker-42:
Process ForkPoolWorker-56:
Process ForkPoolWorker-54:
Process ForkPoolWorker-52:
Process ForkPoolWorker-46:
Process ForkPoolWorker-40:
Process ForkPoolWorker-44:
Process ForkPoolWorker-38:
Process ForkPoolWorker-36:
Process ForkPoolWorker-60:
Process ForkPoolWorker-59:
Process ForkPoolWorker-43:
Process ForkPoolWorker-37:
Process ForkPoolWorker-39:
Process ForkPoolWorker-41:
Process ForkPoolWorker-45:
Process ForkPoolWorker-48:
Process ForkPoolWorker-53:
Process ForkPoolWorker-47:
Process ForkPoolWorker-57:
Process ForkPoolWorker-55:
Process ForkPoolWorker-49:
Process ForkPoolWorker-51:
Traceback (most recent call last):
File "FinalProjectV1.py", line 354, in <module>
main()
File "FinalProjectV1.py", line 332, in main
KRAS_pred_test=kras()
File "FinalProjectV1.py", line 309, in kras
kras_grid_result = kras_grid.fit(X_train_all_genes.values, Y_train_all_genes['KRAS_mutant'].values,callbacks=[KRAS_history])
File "/soe/ianastop/lib/python3.6/site- packages/sklearn/model_selection/_search.py", line 639, in fit
cv.split(X, y, groups)))
File "/soe/ianastop/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 789, in __call__
self.retrieve()
File "/soe/ianastop/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 699, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "/soe/ianastop/venv/lib/python3.6/multiprocessing/pool.py", line 638, in get
self.wait(timeout)
File "/soe/ianastop/venv/lib/python3.6/multiprocessing/pool.py", line 635, in wait
self._event.wait(timeout)
File "/soe/ianastop/venv/lib/python3.6/threading.py", line 551, in wait
signaled = self._cond.wait(timeout)
File "/soe/ianastop/venv/lib/python3.6/threading.py", line 295, in wait
waiter.acquire()
KeyboardInterrupt
It looks like it has something to do with the multiprocess library?