Tensorflow C++ - DNN

Open sguelbol opened this issue 1 year ago • 1 comments

Hey there, I am implementing a multilayer perceptron with the AddSymbolicGradients, for the or and x-or it works with multiclass but for mnist data it doesnt train the loss for the same image each epoch has the loss for the output with shape 10 Loss: 0.875 0.84375 0.9375 0.90625 0.96875 0.9375 0.875 0.875 0.875 0.90625 Loss: 0.96875 0.8125 0.90625 0.875 0.90625 0.875 0.90625 0.96875 0.9375 0.84375 Loss: 0.9375 0.8125 0.90625 0.875 0.9375 0.96875 0.875 0.8125 0.96875 0.90625 Loss: 0.875 0.84375 0.9375 0.90625 0.96875 0.9375 0.875 0.875 0.875 0.90625 Loss: 0.96875 0.8125 0.90625 0.875 0.90625 0.875 0.90625 0.96875 0.9375 0.84375 Loss: 0.9375 0.8125 0.90625 0.875 0.9375 0.96875 0.875 0.8125 0.96875 0.90625 ...

My code looks

void Model::train(Tensor imageTensor, Tensor labelTensor, int maxEpochs, float learningRate, int batchSize) {
    if (imageTensor.dim_size(0) != labelTensor.dim_size(0)) {
        std::cerr << "Image und label dataset size must fit together";
        std::exit(EXIT_FAILURE);
    }
    Tensor imageBatches, labelBatches;
    std::tie(imageBatches, labelBatches) = getBatches(batchSize, imageTensor, labelTensor);
    Scope lossScope = scope.NewSubScope("Training");
    auto loss = Mean(lossScope.WithOpName("Loss"), SquaredDifference(lossScope.WithOpName("Sigmoid-Cross-Entropy"), model, *this->labels), {0});
    std::cout << "Image batches size: " << imageBatches.shape() << std::endl;
    std::vector<Output> apply_gradients = this->backpropagation(lossScope,learningRate, loss);

    std::cout << "Training started" << std::endl;
    int dataSize = imageBatches.dim_size(0);
    std::vector<Tensor> outputs;
    for (int i = 1; i <= maxEpochs; i++) {
        auto lossValue = 0;
        for (int64_t num = 0; num < dataSize; num++) {
            vector<Tensor> output1;
            //auto d1 = DeepCopy(scope, imageBatches.SubSlice(num));
            //auto d2 = DeepCopy(scope, labelBatches.SubSlice(num));
            //TF_CHECK_OK(session->Run({d1, d2}, &output1));
            Tensor imageBatch(imageBatches.SubSlice(num));
            Tensor labelBatch(labelBatches.SubSlice(num));
            TF_CHECK_OK(session->Run({{*features, imageBatch}, {*this->labels, labelBatch}}, apply_gradients, {}, nullptr));
            if (num % 1000 == 0) {
                //TF_CHECK_OK(session->Run({{*features, inputFeatures[num]}, {*this->labels, labels[num]}}, {loss}, &outputs));
                TF_CHECK_OK(session->Run({{*features, imageBatch}, {*this->labels, labelBatch}}, {loss}, &outputs));
                std::cout << "Loss: " << outputs[0].flat<float>() << std::endl;
            }
        }
        if (i % 100 == 0) {
            std::cout << "Epoch " << i << " Loss: " << lossValue << std::endl;
            std::cout << " " << std::endl;
        }
    }
    printWeightForNumber(0);
}

std::vector<Output> Model::backpropagation(Scope lossScope, float learningRate, Output loss) {
    std::vector<std::shared_ptr<Variable>> weights = getAllLayerWeights();
    std::vector<Output> gradients;
    TF_CHECK_OK(AddSymbolicGradients(scope.WithOpName("Gradients"), {loss}, {*weights[0]}, &gradients));

    std::vector<Output> apply_gradients;
    for (int i = 0; i < weights.size(); i++) {
        Output apply_gradient = ApplyGradientDescent(lossScope.WithOpName("Apply-Gradients-" + std::to_string(i)), *weights[i], Cast(scope, learningRate,  DT_FLOAT), gradients[i]);
        apply_gradients.push_back(apply_gradient);
    }
    return apply_gradients;
}

I would appreciate it if you could help me as soon as possible.

Apr 21 '24 21:04 sguelbol

This question is done by using Softmax. But when using hidden layer for training accuracy is very bad with 10%, why maybe because of overfitting, is there a method to use dropout to prevent overfitting?

May 03 '24 17:05 sguelbol