Matrix Calculus¶

We start with code to validate that our estimates are correct.

The first thing we check is that we get the PageRank example correct.

In [4]:
using SparseArrays
using LinearAlgebra
n = 5
P = Matrix(sparse(1:n, mod.(1:n, n) .+ 1, 1.0, n, n)) # make a cycle matrix
P[1,2] = 0.5
P[1,3] = 0.5
P = P'
display(P)
pr(alpha) = (I - alpha*P) \ ((1- alpha)*ones(size(P,1))/size(P,1))
gpr(alpha) = (I - alpha*P) \ (P*pr(alpha) - ones(size(P,1))/size(P,1))
fdest = (pr(0.75 + 0.00001) .- pr(0.75)) / 0.00001 # compute a finite diff. est
[fdest gpr(0.75)]
5×5 adjoint(::Matrix{Float64}) with eltype Float64:
 0.0  0.0  0.0  0.0  1.0
 0.5  0.0  0.0  0.0  0.0
 0.5  1.0  0.0  0.0  0.0
 0.0  0.0  1.0  0.0  0.0
 0.0  0.0  0.0  1.0  0.0
Out[4]:
5×2 Matrix{Float64}:
  0.0393131   0.0393128
 -0.0797881  -0.0797884
 -0.010528   -0.0105278
  0.0180323   0.0180326
  0.0329707   0.0329708
In [13]:
using LinearAlgebra
"""
    gradient_check(f, x, g)

Check that g seems to express the gradient of f by comparing
values computed with finite differences. This is based on the 
gradientcheck function in the Poblano Matlab toolbox.
I've added a relative check. 
"""
function gradient_check(f, x, g)
    fx = f(x) # computed function 
    gx = g(x) # putative gradient
    
    h = sqrt(eps(fx))
    xi = copy(x)
    gxd = copy(gx) 
    for i=1:length(x)
        xi[i] += h
        gxd[i] = (f(xi) - fx)/h
        xi[i] = x[i] # reset
    end
    absdiff = abs.(gxd .- gx)
    relabsdiff = abs.(gxd .- gx) ./ max.(abs.(gx), 1.0)
    
    return (g=gx, gfd=gxd, 
            maxdiff=maximum(absdiff), 
            maxreldiff = maximum(relabsdiff),
            normdiff=norm(gxd - gx))
end
gradient_check(x -> x'*x, randn(10), x -> 2*x).maxdiff
Out[13]:
5.4527566462070354e-8
In [22]:
# Let's test the example from class
# prod(x)^3
gradient_check(x -> prod(x)^3, randn(10), 
               x -> 3*prod(x)^2*prod(x)./x).maxdiff
Out[22]:
4.2474080320799395e-12
In [44]:
# Let's test the example from class
# prod(x)^3
gradient_check(x -> sum(log.(x)), rand(10), 
               x -> 1.0 ./x).maxdiff
Out[44]:
9.191653133941188e-8
In [ ]:
# Let's test the example from class
# prod(x)^3
gradient_check(x -> sum(log.(x)), rand(10), 
               x -> 1.0 ./x).maxdiff
In [66]:
gradient_check(X -> log(det(X)), rand(10,10), X->inv(X)).maxreldiff
Out[66]:
2.666572894810656
In [ ]:
## Let's see what's wrong...
In [67]:
"""
    finite_difference_gradient(f, x)

"""
function finite_difference_gradient(f, x; h=nothing)
    fx = f(x) # computed function 
    
    if h === nothing 
        h = sqrt(eps(fx))
    end 
    xi = copy(x)
    gxd = copy(x) 
    for i=1:length(x)
        xi[i] += h
        gxd[i] = (f(xi) - fx)/h
        xi[i] = x[i] # reset
    end
    return gxd 
end
Out[67]:
finite_difference_gradient
In [70]:
using StableRNGs
X1 = rand(StableRNG(2), 3,3)
finite_difference_gradient(X->log(det(X)), X1)
Out[70]:
3×3 Matrix{Float64}:
 -0.338137   2.67729  -1.16792
 -2.15245    1.93447   0.496364
  3.04566   -5.42069   2.03733
In [72]:
inv(X1)
Out[72]:
3×3 Matrix{Float64}:
 -0.338137  -2.15245    3.04566
  2.67729    1.93447   -5.42069
 -1.16792    0.496364   2.03733
In [83]:
# Is this transposed??????
gradient_check(X -> log(det(X)), rand(10,10), X->inv(X)').maxreldiff
DomainError with -0.026552566720242473:
log was called with a negative real argument but will only return a complex result if called with a complex argument. Try log(Complex(x)).

Stacktrace:
 [1] throw_complex_domainerror(f::Symbol, x::Float64)
   @ Base.Math ./math.jl:33
 [2] _log
   @ ./special/log.jl:295 [inlined]
 [3] log(x::Float64)
   @ Base.Math ./special/log.jl:261
 [4] #304
   @ ./In[83]:2 [inlined]
 [5] gradient_check(f::var"#304#306", x::Matrix{Float64}, g::var"#305#307")
   @ Main ./In[13]:11
 [6] top-level scope
   @ In[83]:2
In [70]:
# Let's test the example from class
# sum(exp(-(x).^2/sigmas))
sigmas = rand(10)
gradient_check(x -> sum(exp.((-(x).^2.0)./sigmas)), randn(10), 
               x -> (-2.0*x./sigmas).*exp.((-(x).^2.0)./sigmas)).maxdiff
Out[70]:
9.668389910899577e-8
In [5]:
## The vec operation!

A = randn(5,4)
display(A)
a = vec(A)
5×4 Matrix{Float64}:
  1.86236   -0.65579     0.683274   -1.55886
  0.73896    0.146993    0.542514   -0.454353
 -1.0584     0.0780594  -0.459759   -1.0819
  0.271845   0.739824    0.1         0.18713
 -1.03668    0.870961   -0.0253258   1.0562
Out[5]:
20-element Vector{Float64}:
  1.8623561533513582
  0.7389599424959767
 -1.0583953069538454
  0.27184515729029757
 -1.0366843195874602
 -0.655790456444445
  0.14699285286682423
  0.07805944615915486
  0.739823575176113
  0.8709609935784469
  0.6832743550603724
  0.5425138397470901
 -0.4597594714688205
  0.10000039096208468
 -0.025325767326301356
 -1.5588642753853783
 -0.45435304621347966
 -1.0819015405001153
  0.18713023415677174
  1.0561950400877163
In [75]:
## Try a least-squares objective.
A = randn(20,10)
b = randn(20)
gradient_check(x -> norm(A*x - b)^2, randn(10), x -> 2*A'*A*x - 2*A'*b).maxdiff
Out[75]:
6.195323720703527e-6
In [94]:
## Try a log-det example. We need a pos. def matrix for this, 
## it's easiest to do sym. pos def.. 
A = randn(20,20)
A = A'*A 
gradient_check(A -> log(det(A)), A, A -> inv(A)).maxdiff
Out[94]:
6.0620823887802544e-5
In [120]:
## Try a log-det example. We need a pos. def matrix for this, 
## it's easiest to do sym. pos def.. 
A = triu(rand(10,10)) # this matrix has pos. eigenvalues
gradient_check(A -> log(det(A)), A, A -> inv(A)').maxdiff
# Is this off due to something with
Out[120]:
0.00014760922896073225
In [121]:
minimum(diag(A))
Out[121]:
0.025750457252672887
In [ ]: