Incorrect results for certain models (Batch Normalization Issue)
There is a bug in the reading of Batch Norm weights that cause some models to produce incorrect results. These are example models that produce incorrect results: https://github.com/cvjena/cnn-models/tree/master/ResNet_preact
Solution: This issue has been fixed in NVIDIA/caffe in the lines starting here: https://github.com/NVIDIA/caffe/blob/caffe-0.17/src/caffe/net.cpp#L1161 I have placed the fix below. In this repo, in the file src/caffe/net.cpp you can find a function called CopyTrainedLayersFrom(). Copy that below code and replace in that file.
void Net::CopyTrainedLayersFrom(const NetParameter& param) {
int num_source_layers = param.layer_size();
for (int i = 0; i < num_source_layers; ++i) {
const LayerParameter& source_layer = param.layer(i);
const string& source_layer_name = source_layer.name();
const string& source_layer_type = source_layer.type();
const bool ignore_shape_mismatch = ((solver_==NULL) || solver_->param().ignore_shape_mismatch());
int target_layer_id = 0;
while (target_layer_id != layer_names_.size() &&
layer_names_[target_layer_id] != source_layer_name) {
++target_layer_id;
}
if (target_layer_id == layer_names_.size()) {
LOG(INFO) << "Ignoring source layer " << source_layer_name;
continue;
}
DLOG(INFO) << "Copying source layer " << source_layer_name;
vector<shared_ptr<Blob> >& target_blobs =
layers_[target_layer_id]->blobs();
CHECK_EQ(target_blobs.size(), source_layer.blobs_size())
<< "Incompatible number of blobs for layer " << source_layer_name;
LOG(INFO) << "Copying source layer " << source_layer_name << " Type:"
<< source_layer_type << " #blobs=" << source_layer.blobs_size();
// check if BN is in legacy DIGITS format?
if (source_layer_type == "BatchNorm") {
for (int j = 0; j < target_blobs.size(); ++j) {
const bool kReshape = true;
target_blobs[j]->FromProto(source_layer.blobs(j), kReshape);
}
if (source_layer.blobs_size() == 5 && target_blobs[4]->count() == 1) {
// old format: 0 - scale , 1 - bias, 2 - mean , 3 - var, 4 - reserved
// new format: 0 - mean , 1 - var, 2 - reserved , 3- scale, 4 - bias
LOG(INFO) << "BN legacy DIGITS format detected ... ";
std::swap(target_blobs[0], target_blobs[2]);
std::swap(target_blobs[1], target_blobs[3]);
// ==> 0 - mean , 1 -var, 2 - scale , 3 - bias; 4 - reserved
std::swap(target_blobs[2], target_blobs[4]);
std::swap(target_blobs[3], target_blobs[4]);
LOG(INFO) << "BN Transforming to new format completed.";
}
if (source_layer.blobs_size() == 3) {
const float scale_factor = target_blobs[2]->cpu_data