I'm trying to solve a time-series regression problem with pytorch
and using optuna
to do the hyperparameter optimization. I have tried to adapt this example in the optuna docs which aims to do hand-written digit recognition.
I've customized the setup
function in the FashionMNISTDataModule
class so that it accepts my pandas dataframe:
from sklearn.model_selection import train_test_split
...
class FashionMNISTDataModule(pl.LightningDataModule):
...
def setup(self, stage: Optional[str] = None) -> None:
#self.mnist_test = datasets.FashionMNIST(
# self.data_dir, train=False, download=0, transform=transforms.ToTensor()
#)
#mnist_full = datasets.FashionMNIST(
# self.data_dir, train=True, download=0, transform=transforms.ToTensor()
#)
# inputs
X = df[['x1','x2','x3','x4']]
# output
y = df[['y']]
# separate into training/testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=500)
# convert to tensors
X_train = torch.from_numpy(X_train.to_numpy()).float()
y_train = torch.squeeze(torch.from_numpy(y_train.to_numpy()).float())
X_test = torch.from_numpy(X_test.to_numpy()).float()
y_test = torch.squeeze(torch.from_numpy(y_test.to_numpy()).float())
self.mnist_test = X_test
mnist_full = y_test
self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])
...
My example data is:
>> df
y x1 x2 x3 x4
Date
2018-03-05 73.68750 2204.0 108.6875 5.964844 2018.0
2018-03-12 65.06250 2244.0 106.0000 11.164062 2102.0
2018-03-19 61.28125 2240.0 106.8750 8.304688 2130.0
2018-03-26 57.87500 2256.0 107.5625 16.750000 2154.0
2019-03-04 173.37500 1826.0 113.8125 16.328125 2130.0
2019-03-11 199.75000 1789.0 110.3750 6.386719 2038.0
2019-03-18 206.25000 1809.0 109.6250 4.468750 1958.0
2019-03-25 186.50000 1780.0 111.1875 17.375000 1949.0
2020-03-02 63.81250 2586.0 113.2500 8.281250 2108.0
2020-03-09 52.75000 2514.0 111.6875 12.937500 2088.0
2020-03-16 72.12500 2468.0 109.7500 15.960938 2058.0
2020-03-23 75.87500 2394.0 111.0000 18.890625 2023.0
2020-03-30 51.71875 2298.0 95.1250 10.843750 2122.0
If I run the code with my new function (keeping everything else nearly the same), I get an error:
usage: ipykernel_launcher.py [-h] [--pruning]
ipykernel_launcher.py: error: unrecognized arguments: -f /home/<username>/.local/share/jupyter/runtime/kernel-027f206b-6952-4ff2-bdbe-c947aad00191.json
An exception has occurred, use %tb to see the full traceback.
SystemExit: 2
It might be because I'm trying to pass a tensor into a function that doesn't accept tensors, but I'm not sure what to change.
If I run the code via .py file instead of on jupyterlab, I get the error message
ValueError('Sum of input lengths does not equal the length of the input dataset!')
Traceback (most recent call last):
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\optuna\_optimize.py", line 216, in _run_trial
value_or_values = func(trial)
File "C:\Users\<username>\Downloads\mymodel_v12beta.py", line 169, in objective
trainer.fit(model, datamodule=datamodule)
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 458, in fit
self._run(model)
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 713, in _run
self.call_setup_hook(model) # allow user to setup lightning_module in accelerator environment
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1159, in call_setup_hook
self.datamodule.setup(stage=fn)
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\pytorch_lightning\core\datamodule.py", line 384, in wrapped_fn
return fn(*args, **kwargs)
File "C:\Users\<username>\Downloads\mymodel_v12beta.py", line 129, in setup
self.mnist_train, self.mnist_val = random_split(mnist_full, [10, 3])
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\utils\data\dataset.py", line 332, in random_split
raise ValueError("Sum of input lengths does not equal the length of the input dataset!")
ValueError: Sum of input lengths does not equal the length of the input dataset! [0m
Traceback (most recent call last):
File "C:\Users\<username>\Downloads\mymodel_v12beta.py", line 190, in <module>
study.optimize(objective, n_trials=100, timeout=600)
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\optuna\study.py", line 401, in optimize
_optimize(
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\optuna\_optimize.py", line 65, in _optimize
_optimize_sequential(
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\optuna\_optimize.py", line 162, in _optimize_sequential
trial = _run_trial(study, func, catch)
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\optuna\_optimize.py", line 267, in _run_trial
raise func_err
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\optuna\_optimize.py", line 216, in _run_trial
value_or_values = func(trial)
File "C:\Users\<username>\Downloads\mymodel_v12beta.py", line 169, in objective
trainer.fit(model, datamodule=datamodule)
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 458, in fit
self._run(model)
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 713, in _run
self.call_setup_hook(model) # allow user to setup lightning_module in accelerator environment
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1159, in call_setup_hook
self.datamodule.setup(stage=fn)
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\pytorch_lightning\core\datamodule.py", line 384, in wrapped_fn
return fn(*args, **kwargs)
File "C:\Users\<username>\Downloads\mymodel_v12beta.py", line 129, in setup
self.mnist_train, self.mnist_val = random_split(mnist_full, [10, 3])
File "C:\Users\<username>\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\utils\data\dataset.py", line 332, in random_split
raise ValueError("Sum of input lengths does not equal the length of the input dataset!")
ValueError: Sum of input lengths does not equal the length of the input dataset!