Tensorflow C++ - DNN
Hey there, I am implementing a multilayer perceptron with the AddSymbolicGradients, for the or and x-or it works with multiclass but for mnist data it doesnt train the loss for the same image each epoch has the loss for the output with shape 10 Loss: 0.875 0.84375 0.9375 0.90625 0.96875 0.9375 0.875 0.875 0.875 0.90625 Loss: 0.96875 0.8125 0.90625 0.875 0.90625 0.875 0.90625 0.96875 0.9375 0.84375 Loss: 0.9375 0.8125 0.90625 0.875 0.9375 0.96875 0.875 0.8125 0.96875 0.90625 Loss: 0.875 0.84375 0.9375 0.90625 0.96875 0.9375 0.875 0.875 0.875 0.90625 Loss: 0.96875 0.8125 0.90625 0.875 0.90625 0.875 0.90625 0.96875 0.9375 0.84375 Loss: 0.9375 0.8125 0.90625 0.875 0.9375 0.96875 0.875 0.8125 0.96875 0.90625 ...
My code looks
void Model::train(Tensor imageTensor, Tensor labelTensor, int maxEpochs, float learningRate, int batchSize) {
if (imageTensor.dim_size(0) != labelTensor.dim_size(0)) {
std::cerr << "Image und label dataset size must fit together";
std::exit(EXIT_FAILURE);
}
Tensor imageBatches, labelBatches;
std::tie(imageBatches, labelBatches) = getBatches(batchSize, imageTensor, labelTensor);
Scope lossScope = scope.NewSubScope("Training");
auto loss = Mean(lossScope.WithOpName("Loss"), SquaredDifference(lossScope.WithOpName("Sigmoid-Cross-Entropy"), model, *this->labels), {0});
std::cout << "Image batches size: " << imageBatches.shape() << std::endl;
std::vector<Output> apply_gradients = this->backpropagation(lossScope,learningRate, loss);
std::cout << "Training started" << std::endl;
int dataSize = imageBatches.dim_size(0);
std::vector<Tensor> outputs;
for (int i = 1; i <= maxEpochs; i++) {
auto lossValue = 0;
for (int64_t num = 0; num < dataSize; num++) {
vector<Tensor> output1;
//auto d1 = DeepCopy(scope, imageBatches.SubSlice(num));
//auto d2 = DeepCopy(scope, labelBatches.SubSlice(num));
//TF_CHECK_OK(session->Run({d1, d2}, &output1));
Tensor imageBatch(imageBatches.SubSlice(num));
Tensor labelBatch(labelBatches.SubSlice(num));
TF_CHECK_OK(session->Run({{*features, imageBatch}, {*this->labels, labelBatch}}, apply_gradients, {}, nullptr));
if (num % 1000 == 0) {
//TF_CHECK_OK(session->Run({{*features, inputFeatures[num]}, {*this->labels, labels[num]}}, {loss}, &outputs));
TF_CHECK_OK(session->Run({{*features, imageBatch}, {*this->labels, labelBatch}}, {loss}, &outputs));
std::cout << "Loss: " << outputs[0].flat<float>() << std::endl;
}
}
if (i % 100 == 0) {
std::cout << "Epoch " << i << " Loss: " << lossValue << std::endl;
std::cout << " " << std::endl;
}
}
printWeightForNumber(0);
}
std::vector<Output> Model::backpropagation(Scope lossScope, float learningRate, Output loss) {
std::vector<std::shared_ptr<Variable>> weights = getAllLayerWeights();
std::vector<Output> gradients;
TF_CHECK_OK(AddSymbolicGradients(scope.WithOpName("Gradients"), {loss}, {*weights[0]}, &gradients));
std::vector<Output> apply_gradients;
for (int i = 0; i < weights.size(); i++) {
Output apply_gradient = ApplyGradientDescent(lossScope.WithOpName("Apply-Gradients-" + std::to_string(i)), *weights[i], Cast(scope, learningRate, DT_FLOAT), gradients[i]);
apply_gradients.push_back(apply_gradient);
}
return apply_gradients;
}
I would appreciate it if you could help me as soon as possible.
This question is done by using Softmax. But when using hidden layer for training accuracy is very bad with 10%, why maybe because of overfitting, is there a method to use dropout to prevent overfitting?