We start with code to validate that our estimates are correct.
The first thing we check is that we get the PageRank example correct.
using SparseArrays
using LinearAlgebra
n = 5
P = Matrix(sparse(1:n, mod.(1:n, n) .+ 1, 1.0, n, n)) # make a cycle matrix
P[1,2] = 0.5
P[1,3] = 0.5
P = P'
display(P)
pr(alpha) = (I - alpha*P) \ ((1- alpha)*ones(size(P,1))/size(P,1))
gpr(alpha) = (I - alpha*P) \ (P*pr(alpha) - ones(size(P,1))/size(P,1))
fdest = (pr(0.75 + 0.00001) .- pr(0.75)) / 0.00001 # compute a finite diff. est
[fdest gpr(0.75)]
5×5 adjoint(::Matrix{Float64}) with eltype Float64: 0.0 0.0 0.0 0.0 1.0 0.5 0.0 0.0 0.0 0.0 0.5 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
5×2 Matrix{Float64}: 0.0393131 0.0393128 -0.0797881 -0.0797884 -0.010528 -0.0105278 0.0180323 0.0180326 0.0329707 0.0329708
using LinearAlgebra
"""
gradient_check(f, x, g)
Check that g seems to express the gradient of f by comparing
values computed with finite differences. This is based on the
gradientcheck function in the Poblano Matlab toolbox.
I've added a relative check.
"""
function gradient_check(f, x, g)
fx = f(x) # computed function
gx = g(x) # putative gradient
h = sqrt(eps(fx))
xi = copy(x)
gxd = copy(gx)
for i=1:length(x)
xi[i] += h
gxd[i] = (f(xi) - fx)/h
xi[i] = x[i] # reset
end
absdiff = abs.(gxd .- gx)
relabsdiff = abs.(gxd .- gx) ./ max.(abs.(gx), 1.0)
return (g=gx, gfd=gxd,
maxdiff=maximum(absdiff),
maxreldiff = maximum(relabsdiff),
normdiff=norm(gxd - gx))
end
gradient_check(x -> x'*x, randn(10), x -> 2*x).maxdiff
5.4527566462070354e-8
# Let's test the example from class
# prod(x)^3
gradient_check(x -> prod(x)^3, randn(10),
x -> 3*prod(x)^2*prod(x)./x).maxdiff
4.2474080320799395e-12
# Let's test the example from class
# prod(x)^3
gradient_check(x -> sum(log.(x)), rand(10),
x -> 1.0 ./x).maxdiff
9.191653133941188e-8
# Let's test the example from class
# prod(x)^3
gradient_check(x -> sum(log.(x)), rand(10),
x -> 1.0 ./x).maxdiff
gradient_check(X -> log(det(X)), rand(10,10), X->inv(X)).maxreldiff
2.666572894810656
## Let's see what's wrong...
"""
finite_difference_gradient(f, x)
"""
function finite_difference_gradient(f, x; h=nothing)
fx = f(x) # computed function
if h === nothing
h = sqrt(eps(fx))
end
xi = copy(x)
gxd = copy(x)
for i=1:length(x)
xi[i] += h
gxd[i] = (f(xi) - fx)/h
xi[i] = x[i] # reset
end
return gxd
end
finite_difference_gradient
using StableRNGs
X1 = rand(StableRNG(2), 3,3)
finite_difference_gradient(X->log(det(X)), X1)
3×3 Matrix{Float64}: -0.338137 2.67729 -1.16792 -2.15245 1.93447 0.496364 3.04566 -5.42069 2.03733
inv(X1)
3×3 Matrix{Float64}: -0.338137 -2.15245 3.04566 2.67729 1.93447 -5.42069 -1.16792 0.496364 2.03733
# Is this transposed??????
gradient_check(X -> log(det(X)), rand(10,10), X->inv(X)').maxreldiff
DomainError with -0.026552566720242473: log was called with a negative real argument but will only return a complex result if called with a complex argument. Try log(Complex(x)). Stacktrace: [1] throw_complex_domainerror(f::Symbol, x::Float64) @ Base.Math ./math.jl:33 [2] _log @ ./special/log.jl:295 [inlined] [3] log(x::Float64) @ Base.Math ./special/log.jl:261 [4] #304 @ ./In[83]:2 [inlined] [5] gradient_check(f::var"#304#306", x::Matrix{Float64}, g::var"#305#307") @ Main ./In[13]:11 [6] top-level scope @ In[83]:2
# Let's test the example from class
# sum(exp(-(x).^2/sigmas))
sigmas = rand(10)
gradient_check(x -> sum(exp.((-(x).^2.0)./sigmas)), randn(10),
x -> (-2.0*x./sigmas).*exp.((-(x).^2.0)./sigmas)).maxdiff
9.668389910899577e-8
## The vec operation!
A = randn(5,4)
display(A)
a = vec(A)
5×4 Matrix{Float64}: 1.86236 -0.65579 0.683274 -1.55886 0.73896 0.146993 0.542514 -0.454353 -1.0584 0.0780594 -0.459759 -1.0819 0.271845 0.739824 0.1 0.18713 -1.03668 0.870961 -0.0253258 1.0562
20-element Vector{Float64}: 1.8623561533513582 0.7389599424959767 -1.0583953069538454 0.27184515729029757 -1.0366843195874602 -0.655790456444445 0.14699285286682423 0.07805944615915486 0.739823575176113 0.8709609935784469 0.6832743550603724 0.5425138397470901 -0.4597594714688205 0.10000039096208468 -0.025325767326301356 -1.5588642753853783 -0.45435304621347966 -1.0819015405001153 0.18713023415677174 1.0561950400877163
## Try a least-squares objective.
A = randn(20,10)
b = randn(20)
gradient_check(x -> norm(A*x - b)^2, randn(10), x -> 2*A'*A*x - 2*A'*b).maxdiff
6.195323720703527e-6
## Try a log-det example. We need a pos. def matrix for this,
## it's easiest to do sym. pos def..
A = randn(20,20)
A = A'*A
gradient_check(A -> log(det(A)), A, A -> inv(A)).maxdiff
6.0620823887802544e-5
## Try a log-det example. We need a pos. def matrix for this,
## it's easiest to do sym. pos def..
A = triu(rand(10,10)) # this matrix has pos. eigenvalues
gradient_check(A -> log(det(A)), A, A -> inv(A)').maxdiff
# Is this off due to something with
0.00014760922896073225
minimum(diag(A))
0.025750457252672887