ReverseDiff.jl icon indicating copy to clipboard operation
ReverseDiff.jl copied to clipboard

Incorrect zero gradient with no error thrown

Open bantin opened this issue 5 years ago • 0 comments

I have am trying to take gradients of a loss function which is defined via this function below. I need to take gradients of tensor_conv w.r.t W and H, which are not mutated inside the function:

function tensor_conv!(est, W::AbstractArray, H::AbstractArray)
    K, N, L = size(W)
    T = size(H, 2)

    @. est = 0
    for lag = 0:(L-1)
        @views s_dot!(est[:, lag+1:T], W[:, :, lag+1]', H, lag, 1, 1)
    end
    
    return est
end

function s_dot!(B, Wl, H, lag, α, β)
    K, T = size(H)
    
    if lag < 0
        @views mul!(B, Wl, H[:, 1+lag:T], α, β) 
    else  # lag >= 0
        @views mul!(B, Wl, H[:, 1:T-lag], α, β)
    end

    return B
end

However, reverseDiff always gives me zero gradient when differentiating this function, it does not throw an error. Can someone explain what might be going wrong?

bantin avatar Mar 08 '21 15:03 bantin