I'm trying to build an MLP model using pylearn2. But the manual isn't that explicit on how to import extern data (all tutorials use the MNSIT datas).
So, my data are CSV files:
0 0.129 -0.234 0.394 ...
0 0.293 -0.394 0.234 ...
1 0.192 -0.842 0.123 ...
...
So it's a matrix, the first column is the value to predict and the other columns are the attributes. I don't even know if it's the right input.
Here is my YAML file:
!obj:pylearn2.train.Train {
dataset: &train !obj:pylearn2.datasets.csv_dataset.CSVDataset {
path: 'train.csv',
task: 'regression',
start: 0,
stop: 53607,
expect_headers: False,
num_outputs: 1
},
model: !obj:pylearn2.models.mlp.MLP {
layers : [
!obj:pylearn2.models.mlp.Sigmoid {
layer_name: 'h0',
dim: 10,
irange: .05
},
!obj:pylearn2.models.mlp.Softmax {
layer_name: 'y',
n_classes: 2,
irange: 0.
},
],
nvis: 220,
},
algorithm: !obj:pylearn2.training_algorithms.bgd.BGD {
batch_size: 1024,
conjugate: 1,
updates_per_batch: 10,
monitoring_dataset:
{
'train' : *train,
'valid' : !obj:pylearn2.datasets.csv_dataset.CSVDataset {
path: 'test.csv',
task: 'regression',
start: 0,
stop: 17868,
expect_headers: False,
}
},
termination_criterion: !obj:pylearn2.termination_criteria.And {
criteria: [
!obj:pylearn2.termination_criteria.MonitorBased {
channel_name: "valid_y_misclass"
},
!obj:pylearn2.termination_criteria.EpochCounter {
max_epochs: 10000
}
]
}
},
extensions: [
!obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest {
channel_name: 'valid_y_missclass',
save_path: "best.pkl"
},
],
}
So, there are two classes, but it says that the task is a regression if you have a matrix and not a vector.
Here is the output error when I run "train.py training.yaml"
Traceback (most recent call last):
File "train.py", line 261, in <module>
args.verbose_logging, args.debug)
File "train.py", line 206, in train
train_obj = serial.load_train_file(config)
File "/home/romain/Projet/long/pylearn2/pylearn2/utils/serial.py", line 430, in load_train_file
return yaml_parse.load_path(config_file_path, environ=environ)
File "/home/romain/Projet/long/pylearn2/pylearn2/config/yaml_parse.py", line 255, in load_path
return load(content, instantiate=instantiate, environ=environ, **kwargs)
File "/home/romain/Projet/long/pylearn2/pylearn2/config/yaml_parse.py", line 213, in load
return _instantiate(proxy_graph)
File "/home/romain/Projet/long/pylearn2/pylearn2/config/yaml_parse.py", line 156, in _instantiate
return _instantiate_proxy_tuple(proxy, bindings)
File "/home/romain/Projet/long/pylearn2/pylearn2/config/yaml_parse.py", line 121, in _instantiate_proxy_tuple
for k, v in six.iteritems(proxy.keywords))
File "/home/romain/Projet/long/pylearn2/pylearn2/config/yaml_parse.py", line 121, in <genexpr>
for k, v in six.iteritems(proxy.keywords))
File "/home/romain/Projet/long/pylearn2/pylearn2/config/yaml_parse.py", line 156, in _instantiate
return _instantiate_proxy_tuple(proxy, bindings)
File "/home/romain/Projet/long/pylearn2/pylearn2/config/yaml_parse.py", line 121, in _instantiate_proxy_tuple
for k, v in six.iteritems(proxy.keywords))
File "/home/romain/Projet/long/pylearn2/pylearn2/config/yaml_parse.py", line 121, in <genexpr>
for k, v in six.iteritems(proxy.keywords))
File "/home/romain/Projet/long/pylearn2/pylearn2/config/yaml_parse.py", line 161, in _instantiate
for k, v in six.iteritems(proxy))
File "/home/romain/Projet/long/pylearn2/pylearn2/config/yaml_parse.py", line 161, in <genexpr>
for k, v in six.iteritems(proxy))
File "/home/romain/Projet/long/pylearn2/pylearn2/config/yaml_parse.py", line 156, in _instantiate
return _instantiate_proxy_tuple(proxy, bindings)
File "/home/romain/Projet/long/pylearn2/pylearn2/config/yaml_parse.py", line 122, in _instantiate_proxy_tuple
obj = checked_call(proxy.callable, kwargs)
File "/home/romain/Projet/long/pylearn2/pylearn2/utils/call_check.py", line 99, in checked_call
return to_call(**kwargs)
File "/home/romain/Projet/long/pylearn2/pylearn2/datasets/csv_dataset.py", line 123, in __init__
X, y = self._load_data()
File "/home/romain/Projet/long/pylearn2/pylearn2/datasets/csv_dataset.py", line 149, in _load_data
data = np.loadtxt(self.path, delimiter=self.delimiter)
File "/usr/lib/python2.7/dist-packages/numpy/lib/npyio.py", line 856, in loadtxt
X = np.array(X, dtype)
I copied train.py from "pylearn/pylearn/scripts/train.py" and the CSV files are in the actual folder.
Is my YAML file wrong? I don't really get the meaning of the error.