using SparseArrays
using LinearAlgebra
n = 5
P = Matrix(sparse(1:n, mod.(1:n, n) .+ 1, 1.0, n, n)) # make a cycle matrix
P[1,2] = 0.5
P[1,3] = 0.5
P = P'
display(P)
pr(alpha) = (I - alpha*P) \ ((1- alpha)*ones(size(P,1))/size(P,1))
gpr(alpha) = (I - alpha*P) \ (P*pr(alpha) - ones(size(P,1))/size(P,1))
fdest = (pr(0.75 + 0.00001) .- pr(0.75)) / 0.00001 # compute a finite diff. est
[fdest gpr(0.75)]

5×5 adjoint(::Matrix{Float64}) with eltype Float64:
 0.0  0.0  0.0  0.0  1.0
 0.5  0.0  0.0  0.0  0.0
 0.5  1.0  0.0  0.0  0.0
 0.0  0.0  1.0  0.0  0.0
 0.0  0.0  0.0  1.0  0.0

5×2 Matrix{Float64}:
  0.0393131   0.0393128
 -0.0797881  -0.0797884
 -0.010528   -0.0105278
  0.0180323   0.0180326
  0.0329707   0.0329708


using LinearAlgebra
"""
    gradient_check(f, x, g)

Check that g seems to express the gradient of f by comparing
values computed with finite differences. This is based on the 
gradientcheck function in the Poblano Matlab toolbox.
"""
function gradient_check(f, x, g)
    fx = f(x) # computed function 
    gx = g(x) # putative gradient
    
    h = sqrt(eps(fx))
    xi = copy(x)
    gxd = copy(gx) 
    for i=1:length(x)
        xi[i] += h
        gxd[i] = (f(xi) - fx)/h
        xi[i] = x[i] # reset
    end
    absdiff = abs.(gxd .- gx)
    
    return (g=gx, gfd=gxd, maxdiff=maximum(absdiff), normdiff=norm(gxd - gx))
end
gradient_check(x -> x'*x, randn(10), x -> 2*x).maxdiff

3.804766579573737e-8


# Let's test the example from class
# prod(x)^3
gradient_check(x -> prod(x)^3, randn(10), 
               x -> 3*prod(x)^2*prod(x)./x).maxdiff

1.459985074026957e-13


# Let's test the example from class
# prod(x)^3
gradient_check(x -> sum(log.(x)), rand(10), 
               x -> 1.0 ./x).maxdiff

5.253822985551437e-7


# Let's test the example from class
# sum(exp(-(x).^2/sigmas))
sigmas = rand(10)
gradient_check(x -> sum(exp.((-(x).^2.0)./sigmas)), randn(10), 
               x -> (-2.0*x./sigmas).*exp.((-(x).^2.0)./sigmas)).maxdiff

9.668389910899577e-8


## The vec operation!

A = randn(5,4)
display(A)
a = vec(A)

5×4 Matrix{Float64}:
  1.86236   -0.65579     0.683274   -1.55886
  0.73896    0.146993    0.542514   -0.454353
 -1.0584     0.0780594  -0.459759   -1.0819
  0.271845   0.739824    0.1         0.18713
 -1.03668    0.870961   -0.0253258   1.0562

20-element Vector{Float64}:
  1.8623561533513582
  0.7389599424959767
 -1.0583953069538454
  0.27184515729029757
 -1.0366843195874602
 -0.655790456444445
  0.14699285286682423
  0.07805944615915486
  0.739823575176113
  0.8709609935784469
  0.6832743550603724
  0.5425138397470901
 -0.4597594714688205
  0.10000039096208468
 -0.025325767326301356
 -1.5588642753853783
 -0.45435304621347966
 -1.0819015405001153
  0.18713023415677174
  1.0561950400877163


## Try a least-squares objective.
A = randn(20,10)
b = randn(20)
gradient_check(x -> norm(A*x - b)^2, randn(10), x -> 2*A'*A*x - 2*A'*b).maxdiff

6.195323720703527e-6


## Try a log-det example. We need a pos. def matrix for this, 
## it's easiest to do sym. pos def.. 
A = randn(20,20)
A = A'*A 
gradient_check(A -> log(det(A)), A, A -> inv(A)).maxdiff

6.0620823887802544e-5


## Try a log-det example. We need a pos. def matrix for this, 
## it's easiest to do sym. pos def.. 
A = triu(rand(10,10)) # this matrix has pos. eigenvalues
gradient_check(A -> log(det(A)), A, A -> inv(A)').maxdiff
# Is this off due to something with

0.00014760922896073225


minimum(diag(A))

0.025750457252672887

Matrix Calculus¶