using Plots, LinearAlgebra, Random, SparseArrays
using Optim


# example of Rosenbrock function 
# NOTE, This doesn't work, see the next cell! 
# See these things are hard to use :) 
function happyf(x) 
    return (1.0 - x[1])^2 + 100.0 * (x[2] - x[1]^2)^2
end
function sadg!(x::Vector, storage::Vector)
storage[1] = -2.0 * (1.0 - x[1]) - 400.0 * (x[2] - x[1]^2) * x[1]
storage[2] = 200.0 * (x[2] - x[1]^2)
end

soln = optimize(happyf, sadg!, [0.0, 0.0], GradientDescent())

 * Status: failure

 * Candidate solution
    Final objective value:     NaN

 * Found with
    Algorithm:     Gradient Descent

 * Convergence measures
    |x - x'|               = 0.00e+00 ≤ 0.0e+00
    |x - x'|/|x'|          = NaN ≰ 0.0e+00
    |f(x) - f(x')|         = NaN ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = NaN ≰ 0.0e+00
    |g(x)|                 = NaN ≰ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    0
    f(x) calls:    1
    ∇f(x) calls:   1


# example of Rosenbrock function
function f(x) 
    return (1.0 - x[1])^2 + 100.0 * (x[2] - x[1]^2)^2
end
function g!(storage::Vector, x::Vector)
storage[1] = -2.0 .* (1.0 - x[1]) - 400.0 .* (x[2] - x[1]^2) * x[1]
storage[2] = 200.0 .* (x[2] - x[1]^2)
end

soln = optimize(f, g!, [0.0, 0.0], BFGS())

 * Status: success

 * Candidate solution
    Final objective value:     7.645688e-21

 * Found with
    Algorithm:     BFGS

 * Convergence measures
    |x - x'|               = 3.48e-07 ≰ 0.0e+00
    |x - x'|/|x'|          = 3.48e-07 ≰ 0.0e+00
    |f(x) - f(x')|         = 6.91e-14 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 9.03e+06 ≰ 0.0e+00
    |g(x)|                 = 2.32e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    16
    f(x) calls:    53
    ∇f(x) calls:   53


soln.minimizer

2-element Vector{Float64}:
 0.9999999999373603
 0.9999999998686199


soln = optimize(f, g!, [0.0, 0.0], GradientDescent())

 * Status: failure (reached maximum number of iterations)

 * Candidate solution
    Final objective value:     4.154719e-03

 * Found with
    Algorithm:     Gradient Descent

 * Convergence measures
    |x - x'|               = 1.82e-04 ≰ 0.0e+00
    |x - x'|/|x'|          = 1.95e-04 ≰ 0.0e+00
    |f(x) - f(x')|         = 8.18e-06 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 1.97e-03 ≰ 0.0e+00
    |g(x)|                 = 8.21e-02 ≰ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    1000
    f(x) calls:    2532
    ∇f(x) calls:   2532


# ChatGPT gave us this line to increase the number of iterations...
soln = optimize(f, g!, [0.0, 0.0], GradientDescent(), Optim.Options(iterations=30000))

 * Status: success

 * Candidate solution
    Final objective value:     7.369388e-17

 * Found with
    Algorithm:     Gradient Descent

 * Convergence measures
    |x - x'|               = 2.00e-11 ≰ 0.0e+00
    |x - x'|/|x'|          = 2.00e-11 ≰ 0.0e+00
    |f(x) - f(x')|         = 1.19e-19 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 1.61e-03 ≰ 0.0e+00
    |g(x)|                 = 9.99e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    20458
    f(x) calls:    51177
    ∇f(x) calls:   51177


# now, we do the matrix factorization example
# originally from Poblano example2

function matrix_approx_function(x::Vector, A::Matrix, r::Int)
    # unpack U and V from x
    m,n = size(A)
    U = reshape(x[1:m*r],m,r)
    V = reshape(x[(m*r+1):end],n,r)
    return 0.5*norm(A - U*V')^2 
end

function matrix_approx_gradient!(storage::Vector, x::Vector, A::Matrix, r::Int)
    m,n = size(A)
    U = reshape(x[1:m*r],m,r)
    V = reshape(x[(m*r+1):end],n,r)
    D = A - U*V'
    storage[1:(m*r)] = -vec(D*V)
    storage[(m*r+1):end] = -vec(D'*U)
end

matrix_approx_gradient! (generic function with 1 method)


# Ideally, we'd check our gradient here!!!
# David foolishly asserts it's correct. We shouldn't trust him.


using Random
Random.seed!(42)
m = 10
n = 8
A = randn(m,n)
#A = Matrix(1.0I,m,n)
r = 8
myf = x -> matrix_approx_function(x, A, r)
myg! = (storage, x) -> matrix_approx_gradient!(storage, x, A, r)

soln = optimize(myf, myg!, ones(m*r+n*r), BFGS(), Optim.Options(f_tol = 1e-10))
#soln = optimize(myf, myg!, randn(m*r+n*r), BFGS(), Optim.Options(f_tol = 1e-8))
x = Optim.minimizer(soln)
@show soln
Uopt = reshape(x[(1:m*r)],m,r)
Vopt = reshape(x[(m*r+1):end],n,r)
objval = 2*myf(x)
opterr = norm(A-Uopt*Vopt')^2

Usvd,Ssvd,Vsvd = svd(A)
svderr = norm(A-Usvd[:,1:r]*Diagonal(Ssvd[1:r])*Vsvd[:,1:r]')^2
@show objval
@show opterr
@show svderr
; # hide final output in JuliaBox

soln =  * Status: success

 * Candidate solution
    Final objective value:     1.182346e-17

 * Found with
    Algorithm:     BFGS

 * Convergence measures
    |x - x'|               = 3.92e-09 ≰ 0.0e+00
    |x - x'|/|x'|          = 7.09e-10 ≰ 0.0e+00
    |f(x) - f(x')|         = 1.05e-16 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 8.90e+00 ≰ 1.0e-10
    |g(x)|                 = 9.16e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    130
    f(x) calls:    317
    ∇f(x) calls:   317

objval = 2.364691437144303e-17
opterr = 2.364691437144303e-17
svderr = 1.2555705456793983e-28


Uopt

10×8 Matrix{Float64}:
 -3.33735   -2.19524   -2.53906   …  -2.54182   -2.80942   -2.89024
 -3.7234    -4.18208   -3.68367      -4.93599   -4.45906   -3.56531
 -0.983073   0.348816   0.849748      0.163245   0.238055   0.727
  4.038      4.99166    4.58689       4.27451    5.5284     5.27877
  0.564552  -0.613908   0.332522      0.142497   0.33654    0.710277
  3.02217    1.44327    1.31406   …   1.95573    1.31999    2.43178
  0.124511   0.702701   1.5509        1.36139    0.167772   0.747722
  3.01424    5.12274    3.71072       4.67978    4.09678    3.2948
  4.45295    4.73057    3.90561       2.9314     3.33936    3.18532
  2.91507    2.16179    2.13676       2.62318    2.44848    2.30038


Vopt

8×8 Matrix{Float64}:
  0.0924827  -0.854372  -0.565314  …   0.292707    0.617051    0.411362
  0.387538   -0.40823    0.206447      0.126999   -0.539906    0.815118
 -0.0532201  -0.720962   0.800373     -0.129858   -0.154867    0.60556
 -0.785825    0.095415   0.646872     -0.219982   -0.0859667   0.880899
 -0.331012    0.113304   0.142373      0.27635     0.258298    0.47644
  0.540078    0.236838  -0.365907  …  -1.13269     0.55546     0.218527
  0.0694765  -0.404263   0.447314     -0.558633   -0.38241    -0.427689
  0.415032    0.234113   0.189276     -0.0432092   0.366161    0.52138


Usvd

10×8 Matrix{Float64}:
  0.259466   -0.381811   -0.275723   …   0.144914   -0.151181    0.724038
  0.435111    0.233448    0.0830214     -0.420493   -0.475092   -0.00147801
 -0.0288846   0.608252   -0.539545      -0.0986429   0.292082    0.0335627
 -0.513671    0.333858   -0.0538174      0.258408   -0.675488    0.133151
 -0.0566207  -0.0354408   0.205559       0.279143    0.186201    0.251128
 -0.177233    0.261441    0.413036   …  -0.503843    0.120149    0.562817
 -0.155586   -0.379952   -0.381767      -0.596319   -0.115871   -0.11921
 -0.41661    -0.154051   -0.401148      -0.033717    0.0310957   0.148548
 -0.417507   -0.0833247   0.192219      -0.173909    0.305561    0.0334984
 -0.26826    -0.271378    0.258094      -0.0832108  -0.230149   -0.199207