No, there is no vanilla cross-entropy in caffe till now. There are only some special instances of cross-entropy in caffe, for example, SigmoidCrossEntropyLossLayer and MultinomialLogisticLossLayer.
But you can get vanilla cross-entropy by simply modefiying the
MultinomialLogisticLossLayer. Because this layer computes cross-entropy assuming that the target probability distribution is a form of one-hot vector, so by modifying its computation formula by assumming the target probability distribution a general probability distribution you can get the vanilla cross-entropy. Hope that will help you.
Orignal MultinomialLogisticLossLayer in caffe:
template <typename Dtype>
void MultinomialLogisticLossLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
const Dtype* bottom_label = bottom[1]->cpu_data();
int num = bottom[0]->num();
int dim = bottom[0]->count() / bottom[0]->num();
Dtype loss = 0;
for (int i = 0; i < num; ++i) {
int label = static_cast<int>(bottom_label[i]);
Dtype prob = std::max(bottom_data[i * dim + label], Dtype(kLOG_THRESHOLD));
loss -= log(prob);
}
top[0]->mutable_cpu_data()[0] = loss / num;
}
Assume that bottom[1] contains target/true distribution, then Cross EntropyLossLayer should be like this now:
void CrossEntropyLossLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
const Dtype* bottom_label = bottom[1]->cpu_data();
int num = bottom[0]->num();
int dim = bottom[0]->count() / bottom[0]->num();
Dtype loss = 0;
for (int i = 0; i < num; ++i) {
for (int j = 0; j < dim; ++j){
Dtype prob = std::max(bottom_data[i * dim + j], Dtype(kLOG_THRESHOLD));
loss -= bottom_label[i * dim + j] * log(prob);
}
}
top[0]->mutable_cpu_data()[0] = loss / num;
}
Similarly, the corresponding backward_cpu function for back propagation will be:
template <typename Dtype>
void CrossEntropyLossLayer<Dtype>::Backward_cpu(
const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[1]) {
LOG(FATAL) << this->type()
<< " Layer cannot backpropagate to label inputs.";
}
if (propagate_down[0]) {
const Dtype* bottom_data = bottom[0]->cpu_data();
const Dtype* bottom_label = bottom[1]->cpu_data();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
int num = bottom[0]->num();
int dim = bottom[0]->count() / bottom[0]->num();
caffe_set(bottom[0]->count(), Dtype(0), bottom_diff);
const Dtype scale = -top[0]->cpu_diff()[0] / num;
for (int i = 0; i < num; ++i) {
for (int j = 0; j < dim; ++j){
Dtype prob = std::max(bottom_data[i * dim + j], Dtype(kLOG_THRESHOLD));
bottom_diff[i * dim + j] = scale * bottom_label[i * dim + j] / prob;
}
}
}
}