#using Pkg; Pkg.add("Zygote")

    Updating registry at `~/.julia/registries/General`
    Updating git-repo `https://github.com/JuliaRegistries/General.git`
   Resolving package versions...
   Installed ZygoteRules ─── v0.2.3
   Installed LLVMExtra_jll ─ v0.0.18+0
   Installed IRTools ─────── v0.4.9
   Installed GPUArrays ───── v8.6.2
   Installed Zygote ──────── v0.6.55
   Installed ChainRules ──── v1.48.0
   Installed LLVM ────────── v4.17.1
    Updating `~/.julia/environments/v1.8/Project.toml`
⌃ [e88e6eb3] + Zygote v0.6.55
    Updating `~/.julia/environments/v1.8/Manifest.toml`
  [082447d4] + ChainRules v1.48.0
⌃ [0c68f7d7] + GPUArrays v8.6.2
  [7869d1d1] + IRTools v0.4.9
⌅ [929cbde3] + LLVM v4.17.1
⌃ [e88e6eb3] + Zygote v0.6.55
  [700de1a5] + ZygoteRules v0.2.3
⌅ [dad2f222] + LLVMExtra_jll v0.0.18+0
        Info Packages marked with ⌃ and ⌅ have new versions available, but those with ⌅ are restricted by compatibility constraints from upgrading. To see why use `status --outdated -m`
Precompiling project...
  ✓ LLVMExtra_jll
  ✓ ZygoteRules
  ✓ IRTools
  ✓ LLVM
  ✓ ChainRules
  ✓ GPUArrays
  ✓ Zygote
  7 dependencies successfully precompiled in 8 seconds. 411 already precompiled. 5 skipped during auto due to previous errors.


using Zygote

function f(alpha, sigma)
    sum(exp.(-(alpha.^2) ./ sigma))
end

# Compute the gradient of f with respect to alpha and sigma
gradient_f(alpha, sigma) = gradient(f, alpha, sigma)

# Test the gradient_f function
alpha_test = 1.0
sigma_test = 2.0

grad_alpha, grad_sigma = gradient_f(alpha_test, sigma_test)

println("Gradient with respect to alpha: ", grad_alpha)
println("Gradient with respect to sigma: ", grad_sigma)

Gradient with respect to alpha: -0.6065306597126334
Gradient with respect to sigma: 0.15163266492815836


#alpha_test = [1.0,2.0]
#sigma_test = 2.0
using LinearAlgebra
"""
    finite_difference_gradient(f, x)

"""
function finite_difference_gradient(f, x; h=nothing)
    fx = f(x) # computed function 
    
    if h === nothing 
        h = sqrt(eps(fx))
    end 
    xi = copy(x)
    gxd = copy(x) 
    for i=1:length(x)
        xi[i] += h
        gxd[i] = (f(xi) - fx)/h
        xi[i] = x[i] # reset
    end
    return gxd 
end 
"""
    gradient_check(f, x, g)

Check that g seems to express the gradient of f by comparing
values computed with finite differences. This is based on the 
gradientcheck function in the Poblano Matlab toolbox.
"""
function gradient_check(f, x, g)
    gx = g(x) # putative gradient
    
    gxd = finite_difference_gradient(f, x)
        
    absdiff = abs.(gxd .- gx)
    relabsdiff = abs.(gxd .- gx) ./ max.(abs.(gx), 1.0)
    
    return (g=gx, gfd=gxd, maxdiff=maximum(absdiff), 
            maxreldiff = maximum(relabsdiff),            
            normdiff=norm(gxd - gx))
end
gradient_check(x -> x'*x, randn(10), x -> 2*x).maxdiff

4.779139217703232e-8


sigmas = rand(10)
gradient_check(x -> f(x, sigmas), randn(10), 
               x -> gradient_f(x, sigmas)[1]).maxdiff

6.129271010735238e-7


using LinearAlgebra
function logdet(X)
    log(det(X))
end
gradlogdet(X) = gradient(logdet, X)
gradlogdet(triu(rand(10,10)))

([1.124998654396587 0.0 … 0.0 0.0; -0.5326680961029698 1.36256303520933 … 0.0 0.0; … ; 2.568782278236005 6.726872059783083 … 4.9216580080856795 0.0; 3.3035474626954975 8.633931237814297 … -1.0912479621173081 1.2954401924752859],)


gradient_check(x->logdet(x), triu(rand(10,10)), 
               x->gradlogdet(x)[1]).maxdiff

0.0007923966184364417


using StableRNGs
X = triu(rand(StableRNG(3), 10,10))
gradlogdet(X)[1]

10×10 Matrix{Float64}:
     2.73632     0.0          0.0         …   0.0        0.0       0.0
   -13.8578     25.0635       0.0             0.0        0.0       0.0
    68.0617   -155.79         9.86121         0.0        0.0       0.0
     6.44452   -40.3972      -3.2654          0.0        0.0       0.0
   -13.8235     45.325       -1.71273         0.0        0.0       0.0
  -135.885     313.995      -13.7         …   0.0        0.0       0.0
    69.0426   -149.992        4.37892         0.0        0.0       0.0
    -3.49697    -0.778595    -0.00974034      1.20345    0.0       0.0
   395.881    -751.759       48.3518         -6.06897    6.34153   0.0
 -4568.44     8938.47      -563.165          54.0732   -68.5513   11.2178


inv(X)'

10×10 adjoint(::Matrix{Float64}) with eltype Float64:
     2.73632     0.0          0.0         …   0.0        0.0       0.0
   -13.8578     25.0635       0.0             0.0        0.0       0.0
    68.0617   -155.79         9.86121         0.0        0.0       0.0
     6.44452   -40.3972      -3.2654          0.0        0.0       0.0
   -13.8235     45.325       -1.71273         0.0        0.0       0.0
  -135.885     313.995      -13.7         …   0.0        0.0       0.0
    69.0426   -149.992        4.37892         0.0        0.0       0.0
    -3.49697    -0.778595    -0.00974034      1.20345    0.0       0.0
   395.881    -751.759       48.3518         -6.06897    6.34153   0.0
 -4568.44     8938.47      -563.165          54.0732   -68.5513   11.2178


gradient_check(x->logdet(x), X, 
               x->gradlogdet(x)[1]).maxdiff

1.683266373760489


gradient_check(x->logdet(x), X, 
               x->gradlogdet(x)[1]).maxreldiff

0.00018831696676604022


max.(abs.(inv(X)'), 1.0)

10×10 Matrix{Float64}:
    2.73632     1.0       1.0        1.0      …   1.0       1.0       1.0
   13.8578     25.0635    1.0        1.0          1.0       1.0       1.0
   68.0617    155.79      9.86121    1.0          1.0       1.0       1.0
    6.44452    40.3972    3.2654     8.45944      1.0       1.0       1.0
   13.8235     45.325     1.71273    3.14934      1.0       1.0       1.0
  135.885     313.995    13.7        5.15814  …   1.0       1.0       1.0
   69.0426    149.992     4.37892    2.35577      1.0       1.0       1.0
    3.49697     1.0       1.0        1.3294       1.20345   1.0       1.0
  395.881     751.759    48.3518    30.1474       6.06897   6.34153   1.0
 4568.44     8938.47    563.165    291.629       54.0732   68.5513   11.2178


using LinearAlgebra, SparseArrays
function pagerank(a,P,v)
    (I-a*P)\((1-a)*v)
end
n = 5
P = Matrix(sparse(1:n, mod.(1:n, n) .+ 1, 1.0, n, n)) # make a cycle matrix
P[1,2] = 0.5
P[1,3] = 0.5
P = P'
prgrad(a) = gradient(pagerank, a, P, ones(n)/5)

prgrad (generic function with 1 method)


gradient_check(x->pagerank(x,P,ones(n)/5), 0.85, 
               x->prgrad(x)[1]).maxdiff

Output is an array, so the gradient is not defined. Perhaps you wanted jacobian.

Stacktrace:
 [1] error(s::String)
   @ Base ./error.jl:35
 [2] sensitivity(y::Vector{Float64})
   @ Zygote ~/.julia/packages/Zygote/HdT4O/src/compiler/interface.jl:119
 [3] gradient(::Function, ::Float64, ::Vararg{Any})
   @ Zygote ~/.julia/packages/Zygote/HdT4O/src/compiler/interface.jl:154
 [4] prgrad(a::Float64)
   @ Main ./In[8]:10
 [5] #14
   @ ./In[9]:2 [inlined]
 [6] gradient_check(f::var"#13#15", x::Float64, g::var"#14#16")
   @ Main ./In[2]:13
 [7] top-level scope
   @ In[9]:1


using LinearAlgebra, SparseArrays
function pagerank(a,P,v)
    (I-a*P)\((1-a)*v)
end
n = 5
P = Matrix(sparse(1:n, mod.(1:n, n) .+ 1, 1.0, n, n)) # make a cycle matrix
P[1,2] = 0.5
P[1,3] = 0.5
P = P'
prgrad(a) = jacobian(pagerank, a, P, ones(n)/5)

prgrad (generic function with 1 method)


[(pagerank(0.75+sqrt(eps(1.0)),P,ones(n)/5) - 
    pagerank(0.75,P,ones(n)/5))/sqrt(eps(1.0))  prgrad(0.75)[1]]

5×2 Matrix{Float64}:
  0.0393128   0.0393128
 -0.0797884  -0.0797884
 -0.0105278  -0.0105278
  0.0180326   0.0180326
  0.0329708   0.0329708


function f2(x) 
    n = length(x) ÷ 2
    alpha = x[1:n]
    sigma = x[n+1:end]
    f(alpha, sigma)
end 
hessian_f(alpha,sigma) = hessian(f, [alpha, sigma])
hessian_f(alpha_test, sigma_test)

MethodError: no method matching f(::Vector{ForwardDiff.Dual{Nothing, Float64, 2}})
Closest candidates are:
  f(::Any, ::Any) at In[5]:3

Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:0 [inlined]
  [2] _pullback(ctx::Zygote.Context{false}, f::typeof(f), args::Vector{ForwardDiff.Dual{Nothing, Float64, 2}})
    @ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:9
  [3] pullback(f::Function, cx::Zygote.Context{false}, args::Vector{ForwardDiff.Dual{Nothing, Float64, 2}})
    @ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface.jl:44
  [4] pullback
    @ ~/.julia/packages/Zygote/g2w9o/src/compiler/interface.jl:42 [inlined]
  [5] gradient(f::Function, args::Vector{ForwardDiff.Dual{Nothing, Float64, 2}})
    @ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface.jl:96
  [6] (::Zygote.var"#114#115"{typeof(f)})(x::Vector{ForwardDiff.Dual{Nothing, Float64, 2}})
    @ Zygote ~/.julia/packages/Zygote/g2w9o/src/lib/grad.jl:64
  [7] forward_jacobian(f::Zygote.var"#114#115"{typeof(f)}, x::Vector{Float64}, #unused#::Val{2})
    @ Zygote ~/.julia/packages/Zygote/g2w9o/src/lib/forward.jl:29
  [8] forward_jacobian(f::Function, x::Vector{Float64}; chunk_threshold::Int64)
    @ Zygote ~/.julia/packages/Zygote/g2w9o/src/lib/forward.jl:44
  [9] forward_jacobian
    @ ~/.julia/packages/Zygote/g2w9o/src/lib/forward.jl:42 [inlined]
 [10] hessian_dual
    @ ~/.julia/packages/Zygote/g2w9o/src/lib/grad.jl:64 [inlined]
 [11] hessian
    @ ~/.julia/packages/Zygote/g2w9o/src/lib/grad.jl:62 [inlined]
 [12] hessian_f(alpha::Float64, sigma::Float64)
    @ Main ./In[30]:7
 [13] top-level scope
    @ In[30]:8


hessian_f(alpha_test, sigma_test)


function simplef(alphas)
    sum(exp.(-(alphas.^2) ./ sigmas))
end
sigmas = rand(10)
hessian_f(alphas) = hessian(simplef, alphas)
alphas = randn(10)
hessian_f(alphas)

10×10 Matrix{Float64}:
  1.7684  -0.0     0.0      -0.0      …  -0.0      -0.0       0.0
 -0.0     -6.4505  0.0      -0.0         -0.0      -0.0       0.0
 -0.0     -0.0     2.18349  -0.0         -0.0      -0.0       0.0
 -0.0     -0.0     0.0       0.62219     -0.0      -0.0       0.0
 -0.0     -0.0     0.0      -0.0         -0.0      -0.0       0.0
 -0.0     -0.0     0.0      -0.0      …  -0.0      -0.0       0.0
 -0.0     -0.0     0.0      -0.0         -0.0      -0.0       0.0
 -0.0     -0.0     0.0      -0.0         -2.40502  -0.0       0.0
 -0.0     -0.0     0.0      -0.0         -0.0       0.102418  0.0
 -0.0     -0.0     0.0      -0.0         -0.0      -0.0       1.94047

Automatic Differentiation¶