We start with code to validate that our estimates are correct.
The first thing we check is that we get the PageRank example correct.
using SparseArrays
using LinearAlgebra
n = 5
P = Matrix(sparse(1:n, mod.(1:n, n) .+ 1, 1.0, n, n)) # make a cycle matrix
P[1,2] = 0.5
P[1,3] = 0.5
P = P'
display(P)
pr(alpha) = (I - alpha*P) \ ((1- alpha)*ones(size(P,1))/size(P,1))
gpr(alpha) = (I - alpha*P) \ (P*pr(alpha) - ones(size(P,1))/size(P,1))
fdest = (pr(0.75 + 0.00001) .- pr(0.75)) / 0.00001 # compute a finite diff. est
[fdest gpr(0.75)]
5×5 adjoint(::Matrix{Float64}) with eltype Float64: 0.0 0.0 0.0 0.0 1.0 0.5 0.0 0.0 0.0 0.0 0.5 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
5×2 Matrix{Float64}: 0.0393131 0.0393128 -0.0797881 -0.0797884 -0.010528 -0.0105278 0.0180323 0.0180326 0.0329707 0.0329708
using LinearAlgebra
"""
gradient_check(f, x, g)
Check that g seems to express the gradient of f by comparing
values computed with finite differences. This is based on the
gradientcheck function in the Poblano Matlab toolbox.
"""
function gradient_check(f, x, g)
fx = f(x) # computed function
gx = g(x) # putative gradient
h = sqrt(eps(fx))
xi = copy(x)
gxd = copy(gx)
for i=1:length(x)
xi[i] += h
gxd[i] = (f(xi) - fx)/h
xi[i] = x[i] # reset
end
absdiff = abs.(gxd .- gx)
return (g=gx, gfd=gxd, maxdiff=maximum(absdiff), normdiff=norm(gxd - gx))
end
gradient_check(x -> x'*x, randn(10), x -> 2*x).maxdiff
3.804766579573737e-8
# Let's test the example from class
# prod(x)^3
gradient_check(x -> prod(x)^3, randn(10),
x -> 3*prod(x)^2*prod(x)./x).maxdiff
1.459985074026957e-13
# Let's test the example from class
# prod(x)^3
gradient_check(x -> sum(log.(x)), rand(10),
x -> 1.0 ./x).maxdiff
5.253822985551437e-7
# Let's test the example from class
# sum(exp(-(x).^2/sigmas))
sigmas = rand(10)
gradient_check(x -> sum(exp.((-(x).^2.0)./sigmas)), randn(10),
x -> (-2.0*x./sigmas).*exp.((-(x).^2.0)./sigmas)).maxdiff
9.668389910899577e-8
## The vec operation!
A = randn(5,4)
display(A)
a = vec(A)
5×4 Matrix{Float64}: 1.86236 -0.65579 0.683274 -1.55886 0.73896 0.146993 0.542514 -0.454353 -1.0584 0.0780594 -0.459759 -1.0819 0.271845 0.739824 0.1 0.18713 -1.03668 0.870961 -0.0253258 1.0562
20-element Vector{Float64}: 1.8623561533513582 0.7389599424959767 -1.0583953069538454 0.27184515729029757 -1.0366843195874602 -0.655790456444445 0.14699285286682423 0.07805944615915486 0.739823575176113 0.8709609935784469 0.6832743550603724 0.5425138397470901 -0.4597594714688205 0.10000039096208468 -0.025325767326301356 -1.5588642753853783 -0.45435304621347966 -1.0819015405001153 0.18713023415677174 1.0561950400877163
## Try a least-squares objective.
A = randn(20,10)
b = randn(20)
gradient_check(x -> norm(A*x - b)^2, randn(10), x -> 2*A'*A*x - 2*A'*b).maxdiff
6.195323720703527e-6
## Try a log-det example. We need a pos. def matrix for this,
## it's easiest to do sym. pos def..
A = randn(20,20)
A = A'*A
gradient_check(A -> log(det(A)), A, A -> inv(A)).maxdiff
6.0620823887802544e-5
## Try a log-det example. We need a pos. def matrix for this,
## it's easiest to do sym. pos def..
A = triu(rand(10,10)) # this matrix has pos. eigenvalues
gradient_check(A -> log(det(A)), A, A -> inv(A)').maxdiff
# Is this off due to something with
0.00014760922896073225
minimum(diag(A))
0.025750457252672887